diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory index 246a45b96d22..58dbc592bc57 100644 --- a/Documentation/ABI/testing/sysfs-devices-memory +++ b/Documentation/ABI/testing/sysfs-devices-memory @@ -26,8 +26,9 @@ Date: September 2008 Contact: Badari Pulavarty Description: The file /sys/devices/system/memory/memoryX/phys_device - is read-only and is designed to show the name of physical - memory device. Implementation is currently incomplete. + is read-only; it is a legacy interface only ever used on s390x + to expose the covered storage increment. +Users: Legacy s390-tools lsmem/chmem What: /sys/devices/system/memory/memoryX/phys_index Date: September 2008 diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst index 5c4432c96c4b..245739f55ac7 100644 --- a/Documentation/admin-guide/mm/memory-hotplug.rst +++ b/Documentation/admin-guide/mm/memory-hotplug.rst @@ -160,8 +160,8 @@ Under each memory block, you can see 5 files: "online_movable", "online", "offline" command which will be performed on all sections in the block. -``phys_device`` read-only: designed to show the name of physical memory - device. This is not well implemented now. +``phys_device`` read-only: legacy interface only ever used on s390x to + expose the covered storage increment. ``removable`` read-only: contains an integer value indicating whether the memory block is removable or not removable. A value of 1 indicates that the memory diff --git a/Documentation/admin-guide/perf/arm-cmn.rst b/Documentation/admin-guide/perf/arm-cmn.rst index 0e4809346014..796e25b7027b 100644 --- a/Documentation/admin-guide/perf/arm-cmn.rst +++ b/Documentation/admin-guide/perf/arm-cmn.rst @@ -17,7 +17,7 @@ PMU events ---------- The PMU driver registers a single PMU device for the whole interconnect, -see /sys/bus/event_source/devices/arm_cmn. Multi-chip systems may link +see /sys/bus/event_source/devices/arm_cmn_0. Multi-chip systems may link more than one CMN together via external CCIX links - in this situation, each mesh counts its own events entirely independently, and additional PMU devices will be named arm_cmn_{1..n}. diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index e35a3f2fb006..16826b31debb 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -69,6 +69,8 @@ Currently, these files are in /proc/sys/vm: - stat_refresh - numa_stat - swappiness +- unevictable_file_kbytes_low +- unevictable_file_kbytes_min - unprivileged_userfaultfd - user_reserve_kbytes - vfs_cache_pressure @@ -886,6 +888,31 @@ calls without any restrictions. The default value is 0. +unevictable_file_kbytes_low +=========================== + +Keep some file pages still mapped under memory pressure to avoid potential +disk thrashing that may occur due to evicting running executables code. +This implements soft eviction throttling, and some file pages can still +be discarded. + +Setting it to 0 effectively disables this feature. + +The default value is 256 MiB. + + +unevictable_file_kbytes_min +=========================== + +Keep all file pages still mapped under memory pressure to avoid potential +disk thrashing that may occur due to evicting running executables code. +This is the hard limit. + +Setting it to 0 effectively disables this feature. + +The default value is 128 MiB. + + user_reserve_kbytes =================== @@ -983,11 +1010,11 @@ that benefit from having their data cached, zone_reclaim_mode should be left disabled as the caching effect is likely to be more important than data locality. -zone_reclaim may be enabled if it's known that the workload is partitioned -such that each partition fits within a NUMA node and that accessing remote -memory would cause a measurable performance reduction. The page allocator -will then reclaim easily reusable pages (those page cache pages that are -currently not used) before allocating off node pages. +Consider enabling one or more zone_reclaim mode bits if it's known that the +workload is partitioned such that each partition fits within a NUMA node +and that accessing remote memory would cause a measurable performance +reduction. The page allocator will take additional actions before +allocating off node pages. Allowing zone reclaim to write out pages stops processes that are writing large amounts of data from dirtying pages on other nodes. Zone diff --git a/Documentation/block/bfq-iosched.rst b/Documentation/block/bfq-iosched.rst index 19d4d1570cee..66c5a4e54130 100644 --- a/Documentation/block/bfq-iosched.rst +++ b/Documentation/block/bfq-iosched.rst @@ -430,13 +430,13 @@ fifo_expire_async ----------------- This parameter is used to set the timeout of asynchronous requests. Default -value of this is 248ms. +value of this is 250ms. fifo_expire_sync ---------------- This parameter is used to set the timeout of synchronous requests. Default -value of this is 124ms. In case to favor synchronous requests over asynchronous +value of this is 125ms. In case to favor synchronous requests over asynchronous one, this value should be decreased relative to fifo_expire_async. low_latency diff --git a/Documentation/devicetree/bindings/net/btusb.txt b/Documentation/devicetree/bindings/net/btusb.txt index b1ad6ee68e90..c51dd99dc0d3 100644 --- a/Documentation/devicetree/bindings/net/btusb.txt +++ b/Documentation/devicetree/bindings/net/btusb.txt @@ -38,7 +38,7 @@ Following example uses irq pin number 3 of gpio0 for out of band wake-on-bt: compatible = "usb1286,204e"; reg = <1>; interrupt-parent = <&gpio0>; - interrupt-name = "wakeup"; + interrupt-names = "wakeup"; interrupts = <3 IRQ_TYPE_LEVEL_LOW>; }; }; diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index dac4aadb6e2e..880e55f7a4b1 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -205,6 +205,11 @@ properties: Indicates that full-duplex is used. When absent, half duplex is assumed. + pause: + $ref: /schemas/types.yaml#definitions/flag + description: + Indicates that pause should be enabled. + asym-pause: $ref: /schemas/types.yaml#/definitions/flag description: diff --git a/Documentation/filesystems/ntfs3.rst b/Documentation/filesystems/ntfs3.rst new file mode 100644 index 000000000000..fb29067360cc --- /dev/null +++ b/Documentation/filesystems/ntfs3.rst @@ -0,0 +1,107 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===== +NTFS3 +===== + + +Summary and Features +==================== + +NTFS3 is fully functional NTFS Read-Write driver. The driver works with +NTFS versions up to 3.1, normal/compressed/sparse files +and journal replaying. File system type to use on mount is 'ntfs3'. + +- This driver implements NTFS read/write support for normal, sparse and + compressed files. +- Supports native journal replaying; +- Supports extended attributes + Predefined extended attributes: + - 'system.ntfs_security' gets/sets security + descriptor (SECURITY_DESCRIPTOR_RELATIVE) + - 'system.ntfs_attrib' gets/sets ntfs file/dir attributes. + Note: applied to empty files, this allows to switch type between + sparse(0x200), compressed(0x800) and normal; +- Supports NFS export of mounted NTFS volumes. + +Mount Options +============= + +The list below describes mount options supported by NTFS3 driver in addition to +generic ones. + +=============================================================================== + +nls=name This option informs the driver how to interpret path + strings and translate them to Unicode and back. If + this option is not set, the default codepage will be + used (CONFIG_NLS_DEFAULT). + Examples: + 'nls=utf8' + +uid= +gid= +umask= Controls the default permissions for files/directories created + after the NTFS volume is mounted. + +fmask= +dmask= Instead of specifying umask which applies both to + files and directories, fmask applies only to files and + dmask only to directories. + +nohidden Files with the Windows-specific HIDDEN (FILE_ATTRIBUTE_HIDDEN) + attribute will not be shown under Linux. + +sys_immutable Files with the Windows-specific SYSTEM + (FILE_ATTRIBUTE_SYSTEM) attribute will be marked as system + immutable files. + +discard Enable support of the TRIM command for improved performance + on delete operations, which is recommended for use with the + solid-state drives (SSD). + +force Forces the driver to mount partitions even if 'dirty' flag + (volume dirty) is set. Not recommended for use. + +sparse Create new files as "sparse". + +showmeta Use this parameter to show all meta-files (System Files) on + a mounted NTFS partition. + By default, all meta-files are hidden. + +prealloc Preallocate space for files excessively when file size is + increasing on writes. Decreases fragmentation in case of + parallel write operations to different files. + +no_acs_rules "No access rules" mount option sets access rights for + files/folders to 777 and owner/group to root. This mount + option absorbs all other permissions: + - permissions change for files/folders will be reported + as successful, but they will remain 777; + - owner/group change will be reported as successful, but + they will stay as root + +acl Support POSIX ACLs (Access Control Lists). Effective if + supported by Kernel. Not to be confused with NTFS ACLs. + The option specified as acl enables support for POSIX ACLs. + +noatime All files and directories will not update their last access + time attribute if a partition is mounted with this parameter. + This option can speed up file system operation. + +=============================================================================== + +ToDo list +========= + +- Full journaling support (currently journal replaying is supported) over JBD. + + +References +========== +https://www.paragon-software.com/home/ntfs-linux-professional/ + - Commercial version of the NTFS driver for Linux. + +almaz.alexandrovich@paragon-software.com + - Direct e-mail address for feedback and requests on the NTFS3 implementation. + diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 2fa69f710e2a..ffbe603152cb 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -47,6 +47,7 @@ fixes/update part 1.1 Stefani Seibold June 9 2009 3.10 /proc//timerslack_ns - Task timerslack value 3.11 /proc//patch_state - Livepatch patch operation state 3.12 /proc//arch_status - Task architecture specific information + 3.13 /proc//ksm - Remote KSM 4 Configuring procfs 4.1 Mount options @@ -2131,6 +2132,19 @@ AVX512_elapsed_ms the task is unlikely an AVX512 user, but depends on the workload and the scheduling scenario, it also could be a false negative mentioned above. +3.13 /proc//ksm - Remote KSM +------------------------------------ +This write-only file allows marking memory of another task for merging +and unmerging via KSM. + +The following actions are available: + + * mark task's memory as mergeable: + # echo merge > /proc//ksm + + * unmerging all the task's memory: + # echo unmerge > /proc//ksm + Chapter 4: Configuring procfs ============================= diff --git a/Documentation/filesystems/seq_file.rst b/Documentation/filesystems/seq_file.rst index 56856481dc8d..a6726082a7c2 100644 --- a/Documentation/filesystems/seq_file.rst +++ b/Documentation/filesystems/seq_file.rst @@ -217,6 +217,12 @@ between the calls to start() and stop(), so holding a lock during that time is a reasonable thing to do. The seq_file code will also avoid taking any other locks while the iterator is active. +The iterater value returned by start() or next() is guaranteed to be +passed to a subsequent next() or stop() call. This allows resources +such as locks that were taken to be reliably released. There is *no* +guarantee that the iterator will be passed to show(), though in practice +it often will be. + Formatted output ================ diff --git a/Documentation/gpu/todo.rst b/Documentation/gpu/todo.rst index 009d8e6c7e3c..1cb7b9f9356e 100644 --- a/Documentation/gpu/todo.rst +++ b/Documentation/gpu/todo.rst @@ -594,6 +594,27 @@ Some of these date from the very introduction of KMS in 2008 ... Level: Intermediate +Remove automatic page mapping from dma-buf importing +---------------------------------------------------- + +When importing dma-bufs, the dma-buf and PRIME frameworks automatically map +imported pages into the importer's DMA area. drm_gem_prime_fd_to_handle() and +drm_gem_prime_handle_to_fd() require that importers call dma_buf_attach() +even if they never do actual device DMA, but only CPU access through +dma_buf_vmap(). This is a problem for USB devices, which do not support DMA +operations. + +To fix the issue, automatic page mappings should be removed from the +buffer-sharing code. Fixing this is a bit more involved, since the import/export +cache is also tied to &drm_gem_object.import_attach. Meanwhile we paper over +this problem for USB devices by fishing out the USB host controller device, as +long as that supports DMA. Otherwise importing can still needlessly fail. + +Contact: Thomas Zimmermann , Daniel Vetter + +Level: Advanced + + Better Testing ============== diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index fa544e9037b9..1b7f8debada6 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -630,16 +630,15 @@ tcp_rmem - vector of 3 INTEGERs: min, default, max default: initial size of receive buffer used by TCP sockets. This value overrides net.core.rmem_default used by other protocols. - Default: 87380 bytes. This value results in window of 65535 with - default setting of tcp_adv_win_scale and tcp_app_win:0 and a bit - less for default tcp_app_win. See below about these variables. + Default: 131072 bytes. + This value results in initial window of 65535. max: maximal size of receive buffer allowed for automatically selected receiver buffers for TCP socket. This value does not override net.core.rmem_max. Calling setsockopt() with SO_RCVBUF disables automatic tuning of that socket's receive buffer size, in which case this value is ignored. - Default: between 87380B and 6MB, depending on RAM size. + Default: between 131072 and 6MB, depending on RAM size. tcp_sack - BOOLEAN Enable select acknowledgments (SACKS). diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst index ae2ae37cd921..a1a3fc7b2a4e 100644 --- a/Documentation/networking/netdev-FAQ.rst +++ b/Documentation/networking/netdev-FAQ.rst @@ -142,73 +142,13 @@ Please send incremental versions on top of what has been merged in order to fix the patches the way they would look like if your latest patch series was to be merged. -How can I tell what patches are queued up for backporting to the various stable releases? ------------------------------------------------------------------------------------------ -Normally Greg Kroah-Hartman collects stable commits himself, but for -networking, Dave collects up patches he deems critical for the -networking subsystem, and then hands them off to Greg. - -There is a patchworks queue that you can see here: - - https://patchwork.kernel.org/bundle/netdev/stable/?state=* - -It contains the patches which Dave has selected, but not yet handed off -to Greg. If Greg already has the patch, then it will be here: - - https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git - -A quick way to find whether the patch is in this stable-queue is to -simply clone the repo, and then git grep the mainline commit ID, e.g. -:: - - stable-queue$ git grep -l 284041ef21fdf2e - releases/3.0.84/ipv6-fix-possible-crashes-in-ip6_cork_release.patch - releases/3.4.51/ipv6-fix-possible-crashes-in-ip6_cork_release.patch - releases/3.9.8/ipv6-fix-possible-crashes-in-ip6_cork_release.patch - stable/stable-queue$ - -I see a network patch and I think it should be backported to stable. Should I request it via stable@vger.kernel.org like the references in the kernel's Documentation/process/stable-kernel-rules.rst file say? ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- -No, not for networking. Check the stable queues as per above first -to see if it is already queued. If not, then send a mail to netdev, -listing the upstream commit ID and why you think it should be a stable -candidate. - -Before you jump to go do the above, do note that the normal stable rules -in :ref:`Documentation/process/stable-kernel-rules.rst ` -still apply. So you need to explicitly indicate why it is a critical -fix and exactly what users are impacted. In addition, you need to -convince yourself that you *really* think it has been overlooked, -vs. having been considered and rejected. - -Generally speaking, the longer it has had a chance to "soak" in -mainline, the better the odds that it is an OK candidate for stable. So -scrambling to request a commit be added the day after it appears should -be avoided. - -I have created a network patch and I think it should be backported to stable. Should I add a Cc: stable@vger.kernel.org like the references in the kernel's Documentation/ directory say? ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ -No. See above answer. In short, if you think it really belongs in -stable, then ensure you write a decent commit log that describes who -gets impacted by the bug fix and how it manifests itself, and when the -bug was introduced. If you do that properly, then the commit will get -handled appropriately and most likely get put in the patchworks stable -queue if it really warrants it. - -If you think there is some valid information relating to it being in -stable that does *not* belong in the commit log, then use the three dash -marker line as described in -:ref:`Documentation/process/submitting-patches.rst ` -to temporarily embed that information into the patch that you send. - -Are all networking bug fixes backported to all stable releases? +Are there special rules regarding stable submissions on netdev? --------------------------------------------------------------- -Due to capacity, Dave could only take care of the backports for the -last two stable releases. For earlier stable releases, each stable -branch maintainer is supposed to take care of them. If you find any -patch is missing from an earlier stable branch, please notify -stable@vger.kernel.org with either a commit ID or a formal patch -backported, and CC Dave and other relevant networking developers. +While it used to be the case that netdev submissions were not supposed +to carry explicit ``CC: stable@vger.kernel.org`` tags that is no longer +the case today. Please follow the standard stable rules in +:ref:`Documentation/process/stable-kernel-rules.rst `, +and make sure you include appropriate Fixes tags! Is the comment style convention different for the networking content? --------------------------------------------------------------------- diff --git a/Documentation/process/stable-kernel-rules.rst b/Documentation/process/stable-kernel-rules.rst index 3973556250e1..003c865e9c21 100644 --- a/Documentation/process/stable-kernel-rules.rst +++ b/Documentation/process/stable-kernel-rules.rst @@ -35,12 +35,6 @@ Rules on what kind of patches are accepted, and which ones are not, into the Procedure for submitting patches to the -stable tree ---------------------------------------------------- - - If the patch covers files in net/ or drivers/net please follow netdev stable - submission guidelines as described in - :ref:`Documentation/networking/netdev-FAQ.rst ` - after first checking the stable networking queue at - https://patchwork.kernel.org/bundle/netdev/stable/?state=* - to ensure the requested patch is not already queued up. - Security patches should not be handled (solely) by the -stable review process but should follow the procedures in :ref:`Documentation/admin-guide/security-bugs.rst `. diff --git a/Documentation/process/submitting-patches.rst b/Documentation/process/submitting-patches.rst index 5ba54120bef7..5a1b1ea3aed0 100644 --- a/Documentation/process/submitting-patches.rst +++ b/Documentation/process/submitting-patches.rst @@ -250,11 +250,6 @@ should also read :ref:`Documentation/process/stable-kernel-rules.rst ` in addition to this file. -Note, however, that some subsystem maintainers want to come to their own -conclusions on which patches should go to the stable trees. The networking -maintainer, in particular, would rather not see individual developers -adding lines like the above to their patches. - If changes affect userland-kernel interfaces, please send the MAN-PAGES maintainer (as listed in the MAINTAINERS file) a man-pages patch, or at least a notification of the change, so that some information makes its way diff --git a/Documentation/scsi/libsas.rst b/Documentation/scsi/libsas.rst index 7216b5d25800..de422253b0ab 100644 --- a/Documentation/scsi/libsas.rst +++ b/Documentation/scsi/libsas.rst @@ -190,12 +190,10 @@ The event interface:: /* LLDD calls these to notify the class of an event. */ void (*notify_ha_event)(struct sas_ha_struct *, enum ha_event); - void (*notify_port_event)(struct sas_phy *, enum port_event); - void (*notify_phy_event)(struct sas_phy *, enum phy_event); - -When sas_register_ha() returns, those are set and can be -called by the LLDD to notify the SAS layer of such events -the SAS layer. + void sas_notify_port_event(struct sas_phy *, enum port_event); + void sas_notify_phy_event(struct sas_phy *, enum phy_event); + void sas_notify_port_event_gfp(struct sas_phy *, enum port_event, gfp_t); + void sas_notify_phy_event_gfp(struct sas_phy *, enum phy_event, gfp_t); The port notification:: diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst index aa0081685ee1..b3ed5c581034 100644 --- a/Documentation/security/keys/core.rst +++ b/Documentation/security/keys/core.rst @@ -1040,8 +1040,8 @@ The keyctl syscall functions are: "key" is the ID of the key to be watched. - "queue_fd" is a file descriptor referring to an open "/dev/watch_queue" - which manages the buffer into which notifications will be delivered. + "queue_fd" is a file descriptor referring to an open pipe which + manages the buffer into which notifications will be delivered. "filter" is either NULL to remove a watch or a filter specification to indicate what events are required from the key. diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 99ceb978c8b0..66d38520e65a 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -182,6 +182,9 @@ is dependent on the CPU capability and the kernel configuration. The limit can be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION ioctl() at run-time. +Creation of the VM will fail if the requested IPA size (whether it is +implicit or explicit) is unsupported on the host. + Please note that configuring the IPA size does not affect the capability exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects size of the address translated by the stage2 level (guest physical to @@ -4828,8 +4831,10 @@ If an MSR access is not permitted through the filtering, it generates a allows user space to deflect and potentially handle various MSR accesses into user space. -If a vCPU is in running state while this ioctl is invoked, the vCPU may -experience inconsistent filtering behavior on MSR accesses. +Note, invoking this ioctl with a vCPU is running is inherently racy. However, +KVM does guarantee that vCPUs will see either the previous filter or the new +filter, e.g. MSRs with identical settings in both the old and new filter will +have deterministic behavior. 5. The kvm_run structure diff --git a/MAINTAINERS b/MAINTAINERS index bfc1b86e3e73..09421fc331b5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1169,7 +1169,7 @@ M: Joel Fernandes M: Christian Brauner M: Hridya Valsaraju M: Suren Baghdasaryan -L: devel@driverdev.osuosl.org +L: linux-kernel@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git F: drivers/android/ @@ -8079,7 +8079,6 @@ F: drivers/crypto/hisilicon/sec2/sec_main.c HISILICON STAGING DRIVERS FOR HIKEY 960/970 M: Mauro Carvalho Chehab -L: devel@driverdev.osuosl.org S: Maintained F: drivers/staging/hikey9xx/ @@ -12667,6 +12666,13 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/aia21/ntfs.git F: Documentation/filesystems/ntfs.rst F: fs/ntfs/ +NTFS3 FILESYSTEM +M: Konstantin Komarov +S: Supported +W: http://www.paragon-software.com/ +F: Documentation/filesystems/ntfs3.rst +F: fs/ntfs3/ + NUBUS SUBSYSTEM M: Finn Thain L: linux-m68k@lists.linux-m68k.org @@ -16911,7 +16917,7 @@ F: drivers/staging/vt665?/ STAGING SUBSYSTEM M: Greg Kroah-Hartman -L: devel@driverdev.osuosl.org +L: linux-staging@lists.linux.dev S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git F: drivers/staging/ @@ -18993,7 +18999,7 @@ VME SUBSYSTEM M: Martyn Welch M: Manohar Vanga M: Greg Kroah-Hartman -L: devel@driverdev.osuosl.org +L: linux-kernel@vger.kernel.org S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git F: Documentation/driver-api/vme.rst diff --git a/Makefile b/Makefile index de1acaefe87e..2506480bc9b0 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = +EXTRAVERSION = -pf6 NAME = 💕 Valentine's Day Edition 💕 # *DOCUMENTATION* @@ -265,7 +265,8 @@ no-dot-config-targets := $(clean-targets) \ $(version_h) headers headers_% archheaders archscripts \ %asm-generic kernelversion %src-pkg dt_binding_check \ outputmakefile -no-sync-config-targets := $(no-dot-config-targets) %install kernelrelease +no-sync-config-targets := $(no-dot-config-targets) %install kernelrelease \ + image_name single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.s %.symtypes %/ config-build := @@ -1044,8 +1045,8 @@ endif # INSTALL_MOD_STRIP export mod_strip_cmd # CONFIG_MODULE_COMPRESS, if defined, will cause module to be compressed -# after they are installed in agreement with CONFIG_MODULE_COMPRESS_GZIP -# or CONFIG_MODULE_COMPRESS_XZ. +# after they are installed in agreement with CONFIG_MODULE_COMPRESS_GZIP, +# CONFIG_MODULE_COMPRESS_XZ, or CONFIG_MODULE_COMPRESS_ZSTD. mod_compress_cmd = true ifdef CONFIG_MODULE_COMPRESS @@ -1055,6 +1056,9 @@ ifdef CONFIG_MODULE_COMPRESS ifdef CONFIG_MODULE_COMPRESS_XZ mod_compress_cmd = $(XZ) -f endif # CONFIG_MODULE_COMPRESS_XZ + ifdef CONFIG_MODULE_COMPRESS_ZSTD + mod_compress_cmd = $(ZSTD) -T0 --rm -f -q + endif # CONFIG_MODULE_COMPRESS_ZSTD endif # CONFIG_MODULE_COMPRESS export mod_compress_cmd @@ -1246,11 +1250,19 @@ define filechk_utsrelease.h endef define filechk_version.h - echo \#define LINUX_VERSION_CODE $(shell \ - expr $(VERSION) \* 65536 + 0$(PATCHLEVEL) \* 256 + 0$(SUBLEVEL)); \ - echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))' + if [ $(SUBLEVEL) -gt 255 ]; then \ + echo \#define LINUX_VERSION_CODE $(shell \ + expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + 255); \ + else \ + echo \#define LINUX_VERSION_CODE $(shell \ + expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + $(SUBLEVEL)); \ + fi; \ + echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + \ + ((c) > 255 ? 255 : (c)))' endef +$(version_h): PATCHLEVEL := $(if $(PATCHLEVEL), $(PATCHLEVEL), 0) +$(version_h): SUBLEVEL := $(if $(SUBLEVEL), $(SUBLEVEL), 0) $(version_h): FORCE $(call filechk,version.h) $(Q)rm -f $(old_version_h) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index d9cce7238a36..73eee41826e2 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -1164,9 +1164,9 @@ __armv4_mmu_cache_off: __armv7_mmu_cache_off: mrc p15, 0, r0, c1, c0 #ifdef CONFIG_MMU - bic r0, r0, #0x000d + bic r0, r0, #0x0005 #else - bic r0, r0, #0x000c + bic r0, r0, #0x0004 #endif mcr p15, 0, r0, c1, c0 @ turn MMU and cache off mov r0, #0 diff --git a/arch/arm/boot/dts/armada-388-helios4.dts b/arch/arm/boot/dts/armada-388-helios4.dts index b3728de3bd3f..ec134e22bae3 100644 --- a/arch/arm/boot/dts/armada-388-helios4.dts +++ b/arch/arm/boot/dts/armada-388-helios4.dts @@ -70,6 +70,9 @@ system-leds { compatible = "gpio-leds"; + pinctrl-names = "default"; + pinctrl-0 = <&helios_system_led_pins>; + status-led { label = "helios4:green:status"; gpios = <&gpio0 24 GPIO_ACTIVE_LOW>; @@ -86,6 +89,9 @@ io-leds { compatible = "gpio-leds"; + pinctrl-names = "default"; + pinctrl-0 = <&helios_io_led_pins>; + sata1-led { label = "helios4:green:ata1"; gpios = <&gpio1 17 GPIO_ACTIVE_LOW>; @@ -121,11 +127,15 @@ fan1: j10-pwm { compatible = "pwm-fan"; pwms = <&gpio1 9 40000>; /* Target freq:25 kHz */ + pinctrl-names = "default"; + pinctrl-0 = <&helios_fan1_pins>; }; fan2: j17-pwm { compatible = "pwm-fan"; pwms = <&gpio1 23 40000>; /* Target freq:25 kHz */ + pinctrl-names = "default"; + pinctrl-0 = <&helios_fan2_pins>; }; usb2_phy: usb2-phy { @@ -286,16 +296,22 @@ "mpp39", "mpp40"; marvell,function = "sd0"; }; - helios_led_pins: helios-led-pins { - marvell,pins = "mpp24", "mpp25", - "mpp49", "mpp50", + helios_system_led_pins: helios-system-led-pins { + marvell,pins = "mpp24", "mpp25"; + marvell,function = "gpio"; + }; + helios_io_led_pins: helios-io-led-pins { + marvell,pins = "mpp49", "mpp50", "mpp52", "mpp53", "mpp54"; marvell,function = "gpio"; }; - helios_fan_pins: helios-fan-pins { - marvell,pins = "mpp41", "mpp43", - "mpp48", "mpp55"; + helios_fan1_pins: helios_fan1_pins { + marvell,pins = "mpp41", "mpp43"; + marvell,function = "gpio"; + }; + helios_fan2_pins: helios_fan2_pins { + marvell,pins = "mpp48", "mpp55"; marvell,function = "gpio"; }; microsom_spi1_cs_pins: spi1-cs-pins { diff --git a/arch/arm/boot/dts/aspeed-g4.dtsi b/arch/arm/boot/dts/aspeed-g4.dtsi index b3dafbc8caca..e7a45ba18fc9 100644 --- a/arch/arm/boot/dts/aspeed-g4.dtsi +++ b/arch/arm/boot/dts/aspeed-g4.dtsi @@ -375,6 +375,7 @@ compatible = "aspeed,ast2400-lpc-snoop"; reg = <0x10 0x8>; interrupts = <8>; + clocks = <&syscon ASPEED_CLK_GATE_LCLK>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/aspeed-g5.dtsi b/arch/arm/boot/dts/aspeed-g5.dtsi index 5bc0de0f3365..21930521a986 100644 --- a/arch/arm/boot/dts/aspeed-g5.dtsi +++ b/arch/arm/boot/dts/aspeed-g5.dtsi @@ -497,6 +497,7 @@ compatible = "aspeed,ast2500-lpc-snoop"; reg = <0x10 0x8>; interrupts = <8>; + clocks = <&syscon ASPEED_CLK_GATE_LCLK>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/aspeed-g6.dtsi b/arch/arm/boot/dts/aspeed-g6.dtsi index 810b0676ab03..3ee470c2b7b5 100644 --- a/arch/arm/boot/dts/aspeed-g6.dtsi +++ b/arch/arm/boot/dts/aspeed-g6.dtsi @@ -524,6 +524,7 @@ compatible = "aspeed,ast2600-lpc-snoop"; reg = <0x0 0x80>; interrupts = ; + clocks = <&syscon ASPEED_CLK_GATE_LCLK>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/at91-sam9x60ek.dts b/arch/arm/boot/dts/at91-sam9x60ek.dts index 73b6b1f89de9..775ceb3acb6c 100644 --- a/arch/arm/boot/dts/at91-sam9x60ek.dts +++ b/arch/arm/boot/dts/at91-sam9x60ek.dts @@ -334,14 +334,6 @@ }; &pinctrl { - atmel,mux-mask = < - /* A B C */ - 0xFFFFFE7F 0xC0E0397F 0xEF00019D /* pioA */ - 0x03FFFFFF 0x02FC7E68 0x00780000 /* pioB */ - 0xffffffff 0xF83FFFFF 0xB800F3FC /* pioC */ - 0x003FFFFF 0x003F8000 0x00000000 /* pioD */ - >; - adc { pinctrl_adc_default: adc_default { atmel,pins = ; diff --git a/arch/arm/boot/dts/at91-sama5d27_som1.dtsi b/arch/arm/boot/dts/at91-sama5d27_som1.dtsi index 1b1163858b1d..e3251f3e3eaa 100644 --- a/arch/arm/boot/dts/at91-sama5d27_som1.dtsi +++ b/arch/arm/boot/dts/at91-sama5d27_som1.dtsi @@ -84,8 +84,8 @@ pinctrl-0 = <&pinctrl_macb0_default>; phy-mode = "rmii"; - ethernet-phy@0 { - reg = <0x0>; + ethernet-phy@7 { + reg = <0x7>; interrupt-parent = <&pioA>; interrupts = ; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/exynos3250-artik5.dtsi b/arch/arm/boot/dts/exynos3250-artik5.dtsi index 04290ec4583a..829c05b2c405 100644 --- a/arch/arm/boot/dts/exynos3250-artik5.dtsi +++ b/arch/arm/boot/dts/exynos3250-artik5.dtsi @@ -79,7 +79,7 @@ pmic@66 { compatible = "samsung,s2mps14-pmic"; interrupt-parent = <&gpx3>; - interrupts = <5 IRQ_TYPE_NONE>; + interrupts = <5 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&s2mps14_irq>; reg = <0x66>; diff --git a/arch/arm/boot/dts/exynos3250-monk.dts b/arch/arm/boot/dts/exynos3250-monk.dts index 69451566945d..fae046e08a5d 100644 --- a/arch/arm/boot/dts/exynos3250-monk.dts +++ b/arch/arm/boot/dts/exynos3250-monk.dts @@ -200,7 +200,7 @@ pmic@66 { compatible = "samsung,s2mps14-pmic"; interrupt-parent = <&gpx0>; - interrupts = <7 IRQ_TYPE_NONE>; + interrupts = <7 IRQ_TYPE_LEVEL_LOW>; reg = <0x66>; wakeup-source; diff --git a/arch/arm/boot/dts/exynos3250-rinato.dts b/arch/arm/boot/dts/exynos3250-rinato.dts index a26e3e582a7e..d64ccf4b7d32 100644 --- a/arch/arm/boot/dts/exynos3250-rinato.dts +++ b/arch/arm/boot/dts/exynos3250-rinato.dts @@ -270,7 +270,7 @@ pmic@66 { compatible = "samsung,s2mps14-pmic"; interrupt-parent = <&gpx0>; - interrupts = <7 IRQ_TYPE_NONE>; + interrupts = <7 IRQ_TYPE_LEVEL_LOW>; reg = <0x66>; wakeup-source; diff --git a/arch/arm/boot/dts/exynos5250-spring.dts b/arch/arm/boot/dts/exynos5250-spring.dts index 9d2baea62d0d..fba1462b19df 100644 --- a/arch/arm/boot/dts/exynos5250-spring.dts +++ b/arch/arm/boot/dts/exynos5250-spring.dts @@ -109,7 +109,7 @@ compatible = "samsung,s5m8767-pmic"; reg = <0x66>; interrupt-parent = <&gpx3>; - interrupts = <2 IRQ_TYPE_NONE>; + interrupts = <2 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&s5m8767_irq &s5m8767_dvs &s5m8767_ds>; wakeup-source; diff --git a/arch/arm/boot/dts/exynos5420-arndale-octa.dts b/arch/arm/boot/dts/exynos5420-arndale-octa.dts index bf457d0c02eb..1aad4859c5f1 100644 --- a/arch/arm/boot/dts/exynos5420-arndale-octa.dts +++ b/arch/arm/boot/dts/exynos5420-arndale-octa.dts @@ -349,7 +349,7 @@ reg = <0x66>; interrupt-parent = <&gpx3>; - interrupts = <2 IRQ_TYPE_EDGE_FALLING>; + interrupts = <2 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&s2mps11_irq>; diff --git a/arch/arm/boot/dts/exynos5422-odroid-core.dtsi b/arch/arm/boot/dts/exynos5422-odroid-core.dtsi index d0df560eb0db..6d690b1db099 100644 --- a/arch/arm/boot/dts/exynos5422-odroid-core.dtsi +++ b/arch/arm/boot/dts/exynos5422-odroid-core.dtsi @@ -509,7 +509,7 @@ samsung,s2mps11-acokb-ground; interrupt-parent = <&gpx0>; - interrupts = <4 IRQ_TYPE_EDGE_FALLING>; + interrupts = <4 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; pinctrl-0 = <&s2mps11_irq>; diff --git a/arch/arm/boot/dts/imx6ull-myir-mys-6ulx-eval.dts b/arch/arm/boot/dts/imx6ull-myir-mys-6ulx-eval.dts index ecbb2cc5b9ab..79cc45728cd2 100644 --- a/arch/arm/boot/dts/imx6ull-myir-mys-6ulx-eval.dts +++ b/arch/arm/boot/dts/imx6ull-myir-mys-6ulx-eval.dts @@ -14,5 +14,6 @@ }; &gpmi { + fsl,use-minimum-ecc; status = "okay"; }; diff --git a/arch/arm/boot/dts/omap443x.dtsi b/arch/arm/boot/dts/omap443x.dtsi index cb309743de5d..dd8ef58cbaed 100644 --- a/arch/arm/boot/dts/omap443x.dtsi +++ b/arch/arm/boot/dts/omap443x.dtsi @@ -33,10 +33,12 @@ }; ocp { + /* 4430 has only gpio_86 tshut and no talert interrupt */ bandgap: bandgap@4a002260 { reg = <0x4a002260 0x4 0x4a00232C 0x4>; compatible = "ti,omap4430-bandgap"; + gpios = <&gpio3 22 GPIO_ACTIVE_HIGH>; #thermal-sensor-cells = <0>; }; diff --git a/arch/arm/boot/dts/sam9x60.dtsi b/arch/arm/boot/dts/sam9x60.dtsi index 84066c1298df..ec45ced3cde6 100644 --- a/arch/arm/boot/dts/sam9x60.dtsi +++ b/arch/arm/boot/dts/sam9x60.dtsi @@ -606,6 +606,15 @@ compatible = "microchip,sam9x60-pinctrl", "atmel,at91sam9x5-pinctrl", "atmel,at91rm9200-pinctrl", "simple-bus"; ranges = <0xfffff400 0xfffff400 0x800>; + /* mux-mask corresponding to sam9x60 SoC in TFBGA228L package */ + atmel,mux-mask = < + /* A B C */ + 0xffffffff 0xffe03fff 0xef00019d /* pioA */ + 0x03ffffff 0x02fc7e7f 0x00780000 /* pioB */ + 0xffffffff 0xffffffff 0xf83fffff /* pioC */ + 0x003fffff 0x003f8000 0x00000000 /* pioD */ + >; + pioA: gpio@fffff400 { compatible = "microchip,sam9x60-gpio", "atmel,at91sam9x5-gpio", "atmel,at91rm9200-gpio"; reg = <0xfffff400 0x200>; diff --git a/arch/arm/boot/dts/tegra30-ouya.dts b/arch/arm/boot/dts/tegra30-ouya.dts index 74da1360d297..0368b3b816ef 100644 --- a/arch/arm/boot/dts/tegra30-ouya.dts +++ b/arch/arm/boot/dts/tegra30-ouya.dts @@ -4352,8 +4352,8 @@ nvidia,pins = "cam_mclk_pcc0"; nvidia,function = "vi_alt3"; nvidia,pull = ; - nvidia,tristate = ; - nvidia,enable-input = ; + nvidia,tristate = ; + nvidia,enable-input = ; }; pcc1 { nvidia,pins = "pcc1"; diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 0203e545bbc8..075a2e0ed2c1 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -248,6 +248,7 @@ struct oabi_epoll_event { __u64 data; } __attribute__ ((packed,aligned(4))); +#ifdef CONFIG_EPOLL asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd, struct oabi_epoll_event __user *event) { @@ -298,6 +299,20 @@ asmlinkage long sys_oabi_epoll_wait(int epfd, kfree(kbuf); return err ? -EFAULT : ret; } +#else +asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd, + struct oabi_epoll_event __user *event) +{ + return -EINVAL; +} + +asmlinkage long sys_oabi_epoll_wait(int epfd, + struct oabi_epoll_event __user *events, + int maxevents, int timeout) +{ + return -EINVAL; +} +#endif struct oabi_sembuf { unsigned short sem_num; diff --git a/arch/arm/mach-at91/pm_suspend.S b/arch/arm/mach-at91/pm_suspend.S index 0184de05c1be..b683c2caa40b 100644 --- a/arch/arm/mach-at91/pm_suspend.S +++ b/arch/arm/mach-at91/pm_suspend.S @@ -442,7 +442,7 @@ ENDPROC(at91_backup_mode) str tmp1, [pmc, #AT91_PMC_PLL_UPDT] /* step 2. */ - ldr tmp1, =#AT91_PMC_PLL_ACR_DEFAULT_PLLA + ldr tmp1, =AT91_PMC_PLL_ACR_DEFAULT_PLLA str tmp1, [pmc, #AT91_PMC_PLL_ACR] /* step 3. */ diff --git a/arch/arm/mach-ixp4xx/Kconfig b/arch/arm/mach-ixp4xx/Kconfig index f7211b57b1e7..165c184801e1 100644 --- a/arch/arm/mach-ixp4xx/Kconfig +++ b/arch/arm/mach-ixp4xx/Kconfig @@ -13,7 +13,6 @@ config MACH_IXP4XX_OF select I2C select I2C_IOP3XX select PCI - select TIMER_OF select USE_OF help Say 'Y' here to support Device Tree-based IXP4xx platforms. diff --git a/arch/arm/mach-omap2/sr_device.c b/arch/arm/mach-omap2/sr_device.c index 62df666c2bd0..17b66f0d0dee 100644 --- a/arch/arm/mach-omap2/sr_device.c +++ b/arch/arm/mach-omap2/sr_device.c @@ -88,34 +88,26 @@ static void __init sr_set_nvalues(struct omap_volt_data *volt_data, extern struct omap_sr_data omap_sr_pdata[]; -static int __init sr_dev_init(struct omap_hwmod *oh, void *user) +static int __init sr_init_by_name(const char *name, const char *voltdm) { struct omap_sr_data *sr_data = NULL; struct omap_volt_data *volt_data; - struct omap_smartreflex_dev_attr *sr_dev_attr; static int i; - if (!strncmp(oh->name, "smartreflex_mpu_iva", 20) || - !strncmp(oh->name, "smartreflex_mpu", 16)) + if (!strncmp(name, "smartreflex_mpu_iva", 20) || + !strncmp(name, "smartreflex_mpu", 16)) sr_data = &omap_sr_pdata[OMAP_SR_MPU]; - else if (!strncmp(oh->name, "smartreflex_core", 17)) + else if (!strncmp(name, "smartreflex_core", 17)) sr_data = &omap_sr_pdata[OMAP_SR_CORE]; - else if (!strncmp(oh->name, "smartreflex_iva", 16)) + else if (!strncmp(name, "smartreflex_iva", 16)) sr_data = &omap_sr_pdata[OMAP_SR_IVA]; if (!sr_data) { - pr_err("%s: Unknown instance %s\n", __func__, oh->name); + pr_err("%s: Unknown instance %s\n", __func__, name); return -EINVAL; } - sr_dev_attr = (struct omap_smartreflex_dev_attr *)oh->dev_attr; - if (!sr_dev_attr || !sr_dev_attr->sensor_voltdm_name) { - pr_err("%s: No voltage domain specified for %s. Cannot initialize\n", - __func__, oh->name); - goto exit; - } - - sr_data->name = oh->name; + sr_data->name = name; if (cpu_is_omap343x()) sr_data->ip_type = 1; else @@ -136,10 +128,10 @@ static int __init sr_dev_init(struct omap_hwmod *oh, void *user) } } - sr_data->voltdm = voltdm_lookup(sr_dev_attr->sensor_voltdm_name); + sr_data->voltdm = voltdm_lookup(voltdm); if (!sr_data->voltdm) { pr_err("%s: Unable to get voltage domain pointer for VDD %s\n", - __func__, sr_dev_attr->sensor_voltdm_name); + __func__, voltdm); goto exit; } @@ -160,6 +152,20 @@ static int __init sr_dev_init(struct omap_hwmod *oh, void *user) return 0; } +static int __init sr_dev_init(struct omap_hwmod *oh, void *user) +{ + struct omap_smartreflex_dev_attr *sr_dev_attr; + + sr_dev_attr = (struct omap_smartreflex_dev_attr *)oh->dev_attr; + if (!sr_dev_attr || !sr_dev_attr->sensor_voltdm_name) { + pr_err("%s: No voltage domain specified for %s. Cannot initialize\n", + __func__, oh->name); + return 0; + } + + return sr_init_by_name(oh->name, sr_dev_attr->sensor_voltdm_name); +} + /* * API to be called from board files to enable smartreflex * autocompensation at init. @@ -169,7 +175,42 @@ void __init omap_enable_smartreflex_on_init(void) sr_enable_on_init = true; } +static const char * const omap4_sr_instances[] = { + "mpu", + "iva", + "core", +}; + +static const char * const dra7_sr_instances[] = { + "mpu", + "core", +}; + int __init omap_devinit_smartreflex(void) { + const char * const *sr_inst; + int i, nr_sr = 0; + + if (soc_is_omap44xx()) { + sr_inst = omap4_sr_instances; + nr_sr = ARRAY_SIZE(omap4_sr_instances); + + } else if (soc_is_dra7xx()) { + sr_inst = dra7_sr_instances; + nr_sr = ARRAY_SIZE(dra7_sr_instances); + } + + if (nr_sr) { + const char *name, *voltdm; + + for (i = 0; i < nr_sr; i++) { + name = kasprintf(GFP_KERNEL, "smartreflex_%s", sr_inst[i]); + voltdm = sr_inst[i]; + sr_init_by_name(name, voltdm); + } + + return 0; + } + return omap_hwmod_for_each_by_class("smartreflex", sr_dev_init, NULL); } diff --git a/arch/arm/mach-s3c/irq-s3c24xx-fiq.S b/arch/arm/mach-s3c/irq-s3c24xx-fiq.S index b54cbd012241..5d238d9a798e 100644 --- a/arch/arm/mach-s3c/irq-s3c24xx-fiq.S +++ b/arch/arm/mach-s3c/irq-s3c24xx-fiq.S @@ -35,7 +35,6 @@ @ and an offset to the irq acknowledgment word ENTRY(s3c24xx_spi_fiq_rx) -s3c24xx_spi_fix_rx: .word fiq_rx_end - fiq_rx_start .word fiq_rx_irq_ack - fiq_rx_start fiq_rx_start: @@ -49,7 +48,7 @@ fiq_rx_start: strb fiq_rtmp, [ fiq_rspi, # S3C2410_SPTDAT ] subs fiq_rcount, fiq_rcount, #1 - subnes pc, lr, #4 @@ return, still have work to do + subsne pc, lr, #4 @@ return, still have work to do @@ set IRQ controller so that next op will trigger IRQ mov fiq_rtmp, #0 @@ -61,7 +60,6 @@ fiq_rx_irq_ack: fiq_rx_end: ENTRY(s3c24xx_spi_fiq_txrx) -s3c24xx_spi_fiq_txrx: .word fiq_txrx_end - fiq_txrx_start .word fiq_txrx_irq_ack - fiq_txrx_start fiq_txrx_start: @@ -76,7 +74,7 @@ fiq_txrx_start: strb fiq_rtmp, [ fiq_rspi, # S3C2410_SPTDAT ] subs fiq_rcount, fiq_rcount, #1 - subnes pc, lr, #4 @@ return, still have work to do + subsne pc, lr, #4 @@ return, still have work to do mov fiq_rtmp, #0 str fiq_rtmp, [ fiq_rirq, # S3C2410_INTMOD - S3C24XX_VA_IRQ ] @@ -88,7 +86,6 @@ fiq_txrx_irq_ack: fiq_txrx_end: ENTRY(s3c24xx_spi_fiq_tx) -s3c24xx_spi_fix_tx: .word fiq_tx_end - fiq_tx_start .word fiq_tx_irq_ack - fiq_tx_start fiq_tx_start: @@ -101,7 +98,7 @@ fiq_tx_start: strb fiq_rtmp, [ fiq_rspi, # S3C2410_SPTDAT ] subs fiq_rcount, fiq_rcount, #1 - subnes pc, lr, #4 @@ return, still have work to do + subsne pc, lr, #4 @@ return, still have work to do mov fiq_rtmp, #0 str fiq_rtmp, [ fiq_rirq, # S3C2410_INTMOD - S3C24XX_VA_IRQ ] diff --git a/arch/arm/xen/p2m.c b/arch/arm/xen/p2m.c index e52950a43f2e..acb464547a54 100644 --- a/arch/arm/xen/p2m.c +++ b/arch/arm/xen/p2m.c @@ -93,10 +93,39 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, int i; for (i = 0; i < count; i++) { + struct gnttab_unmap_grant_ref unmap; + int rc; + if (map_ops[i].status) continue; - set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT, - map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT); + if (likely(set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT, + map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT))) + continue; + + /* + * Signal an error for this slot. This in turn requires + * immediate unmapping. + */ + map_ops[i].status = GNTST_general_error; + unmap.host_addr = map_ops[i].host_addr, + unmap.handle = map_ops[i].handle; + map_ops[i].handle = ~0; + if (map_ops[i].flags & GNTMAP_device_map) + unmap.dev_bus_addr = map_ops[i].dev_bus_addr; + else + unmap.dev_bus_addr = 0; + + /* + * Pre-populate the status field, to be recognizable in + * the log message below. + */ + unmap.status = 1; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + &unmap, 1); + if (rc || unmap.status != GNTST_okay) + pr_err_once("gnttab unmap failed: rc=%d st=%d\n", + rc, unmap.status); } return 0; diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f39568b28ec1..e42da99db91f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -522,7 +522,7 @@ config ARM64_ERRATUM_1024718 help This option adds a workaround for ARM Cortex-A55 Erratum 1024718. - Affected Cortex-A55 cores (r0p0, r0p1, r1p0) could cause incorrect + Affected Cortex-A55 cores (all revisions) could cause incorrect update of the hardware dirty bit when the DBM/AP bits are updated without a break-before-make. The workaround is to disable the usage of hardware DBM locally on the affected cores. CPUs not affected by @@ -952,8 +952,9 @@ choice that is selected here. config CPU_BIG_ENDIAN - bool "Build big-endian kernel" - help + bool "Build big-endian kernel" + depends on !LD_IS_LLD || LLD_VERSION >= 130000 + help Say Y if you plan on running a kernel with a big-endian userspace. config CPU_LITTLE_ENDIAN diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts index 896f34fd9fc3..7ae16541d14f 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-pinebook.dts @@ -126,8 +126,6 @@ }; &ehci0 { - phys = <&usbphy 0>; - phy-names = "usb"; status = "okay"; }; @@ -169,6 +167,7 @@ pinctrl-0 = <&mmc2_pins>, <&mmc2_ds_pin>; vmmc-supply = <®_dcdc1>; vqmmc-supply = <®_eldo1>; + max-frequency = <200000000>; bus-width = <8>; non-removable; cap-mmc-hw-reset; @@ -177,8 +176,6 @@ }; &ohci0 { - phys = <&usbphy 0>; - phy-names = "usb"; status = "okay"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi index c48692b06e1f..3402cec87035 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64-sopine.dtsi @@ -32,7 +32,6 @@ pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins>; vmmc-supply = <®_dcdc1>; - non-removable; disable-wp; bus-width = <4>; cd-gpios = <&pio 5 6 GPIO_ACTIVE_LOW>; /* PF6 */ diff --git a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi index 51cc30e84e26..57786fc120c3 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-a64.dtsi @@ -514,7 +514,7 @@ resets = <&ccu RST_BUS_MMC2>; reset-names = "ahb"; interrupts = ; - max-frequency = <200000000>; + max-frequency = <150000000>; status = "disabled"; #address-cells = <1>; #size-cells = <0>; @@ -593,6 +593,8 @@ <&ccu CLK_USB_OHCI0>; resets = <&ccu RST_BUS_OHCI0>, <&ccu RST_BUS_EHCI0>; + phys = <&usbphy 0>; + phy-names = "usb"; status = "disabled"; }; @@ -603,6 +605,8 @@ clocks = <&ccu CLK_BUS_OHCI0>, <&ccu CLK_USB_OHCI0>; resets = <&ccu RST_BUS_OHCI0>; + phys = <&usbphy 0>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi index 8a62a9fbe347..77765d4a05ec 100644 --- a/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi +++ b/arch/arm64/boot/dts/allwinner/sun50i-h6.dtsi @@ -436,6 +436,7 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&mmc0_pins>; + max-frequency = <150000000>; status = "disabled"; #address-cells = <1>; #size-cells = <0>; @@ -452,6 +453,7 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&mmc1_pins>; + max-frequency = <150000000>; status = "disabled"; #address-cells = <1>; #size-cells = <0>; @@ -468,6 +470,7 @@ interrupts = ; pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins>; + max-frequency = <150000000>; status = "disabled"; #address-cells = <1>; #size-cells = <0>; @@ -680,6 +683,8 @@ <&ccu CLK_USB_OHCI0>; resets = <&ccu RST_BUS_OHCI0>, <&ccu RST_BUS_EHCI0>; + phys = <&usb2phy 0>; + phy-names = "usb"; status = "disabled"; }; @@ -690,6 +695,8 @@ clocks = <&ccu CLK_BUS_OHCI0>, <&ccu CLK_USB_OHCI0>; resets = <&ccu RST_BUS_OHCI0>; + phys = <&usb2phy 0>; + phy-names = "usb"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts b/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts index 4b517ca72059..06de0b1ce726 100644 --- a/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts +++ b/arch/arm64/boot/dts/amlogic/meson-sm1-khadas-vim3l.dts @@ -89,13 +89,12 @@ status = "okay"; }; -&sd_emmc_a { - sd-uhs-sdr50; -}; - &usb { phys = <&usb2_phy0>, <&usb2_phy1>; phy-names = "usb2-phy0", "usb2-phy1"; }; */ +&sd_emmc_a { + sd-uhs-sdr50; +}; diff --git a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi index f873dc44ce9c..55d9b56ac749 100644 --- a/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi +++ b/arch/arm64/boot/dts/broadcom/bcm4908/bcm4908.dtsi @@ -164,7 +164,7 @@ nand@1800 { #address-cells = <1>; #size-cells = <0>; - compatible = "brcm,brcmnand-v7.1", "brcm,brcmnand"; + compatible = "brcm,nand-bcm63138", "brcm,brcmnand-v7.1", "brcm,brcmnand"; reg = <0x1800 0x600>, <0x2000 0x10>; reg-names = "nand", "nand-int-base"; interrupts = ; diff --git a/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi b/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi index 03486a8ffc67..4c5106a0860d 100644 --- a/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos5433-tm2-common.dtsi @@ -388,7 +388,7 @@ pmic@66 { compatible = "samsung,s2mps13-pmic"; interrupt-parent = <&gpa0>; - interrupts = <7 IRQ_TYPE_NONE>; + interrupts = <7 IRQ_TYPE_LEVEL_LOW>; reg = <0x66>; samsung,s2mps11-wrstbi-ground; diff --git a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts index 695d4c140646..125c03f351d9 100644 --- a/arch/arm64/boot/dts/exynos/exynos7-espresso.dts +++ b/arch/arm64/boot/dts/exynos/exynos7-espresso.dts @@ -90,7 +90,7 @@ pmic@66 { compatible = "samsung,s2mps15-pmic"; reg = <0x66>; - interrupts = <2 IRQ_TYPE_NONE>; + interrupts = <2 IRQ_TYPE_LEVEL_LOW>; interrupt-parent = <&gpa0>; pinctrl-names = "default"; pinctrl-0 = <&pmic_irq>; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi index 626b709d1fb9..03de3a34276a 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi @@ -192,6 +192,7 @@ ranges = <0x0 0x00 0x1700000 0x100000>; reg = <0x00 0x1700000 0x0 0x100000>; interrupts = ; + dma-coherent; sec_jr0: jr@10000 { compatible = "fsl,sec-v5.4-job-ring", diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index bbae4b353d3f..c3c34e519e90 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -322,6 +322,7 @@ ranges = <0x0 0x00 0x1700000 0x100000>; reg = <0x00 0x1700000 0x0 0x100000>; interrupts = <0 75 0x4>; + dma-coherent; sec_jr0: jr@10000 { compatible = "fsl,sec-v5.4-job-ring", diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi index 565934cbfa28..719451ee09d0 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi @@ -325,6 +325,7 @@ ranges = <0x0 0x00 0x1700000 0x100000>; reg = <0x00 0x1700000 0x0 0x100000>; interrupts = ; + dma-coherent; sec_jr0: jr@10000 { compatible = "fsl,sec-v5.4-job-ring", diff --git a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi index e1c0fcba5c20..07c099b4ed5b 100644 --- a/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi +++ b/arch/arm64/boot/dts/intel/socfpga_agilex.dtsi @@ -166,7 +166,7 @@ rx-fifo-depth = <16384>; snps,multicast-filter-bins = <256>; iommus = <&smmu 2>; - altr,sysmgr-syscon = <&sysmgr 0x48 8>; + altr,sysmgr-syscon = <&sysmgr 0x48 0>; clocks = <&clkmgr AGILEX_EMAC1_CLK>, <&clkmgr AGILEX_EMAC_PTP_CLK>; clock-names = "stmmaceth", "ptp_ref"; status = "disabled"; @@ -184,7 +184,7 @@ rx-fifo-depth = <16384>; snps,multicast-filter-bins = <256>; iommus = <&smmu 3>; - altr,sysmgr-syscon = <&sysmgr 0x4c 16>; + altr,sysmgr-syscon = <&sysmgr 0x4c 0>; clocks = <&clkmgr AGILEX_EMAC2_CLK>, <&clkmgr AGILEX_EMAC_PTP_CLK>; clock-names = "stmmaceth", "ptp_ref"; status = "disabled"; diff --git a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts index f5ec3b644769..d239ab70ed99 100644 --- a/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts +++ b/arch/arm64/boot/dts/marvell/armada-3720-turris-mox.dts @@ -205,7 +205,7 @@ }; partition@20000 { - label = "u-boot"; + label = "a53-firmware"; reg = <0x20000 0x160000>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index 5b9ec032ce8d..7c6d871538a6 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -698,6 +698,8 @@ clocks = <&pericfg CLK_PERI_MSDC30_1_PD>, <&topckgen CLK_TOP_AXI_SEL>; clock-names = "source", "hclk"; + resets = <&pericfg MT7622_PERI_MSDC1_SW_RST>; + reset-names = "hrst"; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index 5b782a4769e7..36a90dd2fa7c 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -6,7 +6,7 @@ */ #include -#include +#include #include #include #include @@ -661,6 +661,7 @@ compatible = "mediatek,mt8183-disp-pwm"; reg = <0 0x1100e000 0 0x1000>; interrupts = ; + power-domains = <&spm MT8183_POWER_DOMAIN_DISP>; #pwm-cells = <2>; clocks = <&topckgen CLK_TOP_MUX_DISP_PWM>, <&infracfg CLK_INFRA_DISP_PWM>; @@ -1011,7 +1012,7 @@ clocks = <&mmsys CLK_MM_DISP_RDMA0>; iommus = <&iommu M4U_PORT_DISP_RDMA0>; mediatek,larb = <&larb0>; - mediatek,rdma_fifo_size = <5120>; + mediatek,rdma-fifo-size = <5120>; mediatek,gce-client-reg = <&gce SUBSYS_1400XXXX 0xb000 0x1000>; }; @@ -1023,7 +1024,7 @@ clocks = <&mmsys CLK_MM_DISP_RDMA1>; iommus = <&iommu M4U_PORT_DISP_RDMA1>; mediatek,larb = <&larb0>; - mediatek,rdma_fifo_size = <2048>; + mediatek,rdma-fifo-size = <2048>; mediatek,gce-client-reg = <&gce SUBSYS_1400XXXX 0xc000 0x1000>; }; @@ -1055,8 +1056,7 @@ }; gamma0: gamma@14011000 { - compatible = "mediatek,mt8183-disp-gamma", - "mediatek,mt8173-disp-gamma"; + compatible = "mediatek,mt8183-disp-gamma"; reg = <0 0x14011000 0 0x1000>; interrupts = ; power-domains = <&spm MT8183_POWER_DOMAIN_DISP>; diff --git a/arch/arm64/boot/dts/nvidia/tegra210.dtsi b/arch/arm64/boot/dts/nvidia/tegra210.dtsi index 4fbf8c15b0a1..fd33b4d28ef3 100644 --- a/arch/arm64/boot/dts/nvidia/tegra210.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra210.dtsi @@ -997,6 +997,7 @@ <&tegra_car 128>, /* hda2hdmi */ <&tegra_car 111>; /* hda2codec_2x */ reset-names = "hda", "hda2hdmi", "hda2codec_2x"; + power-domains = <&pd_sor>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/qcom/msm8916-samsung-a2015-common.dtsi b/arch/arm64/boot/dts/qcom/msm8916-samsung-a2015-common.dtsi index f91269492d72..f1af798abd74 100644 --- a/arch/arm64/boot/dts/qcom/msm8916-samsung-a2015-common.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916-samsung-a2015-common.dtsi @@ -106,6 +106,9 @@ interrupt-parent = <&msmgpio>; interrupts = <115 IRQ_TYPE_EDGE_RISING>; + vdd-supply = <&pm8916_l17>; + vddio-supply = <&pm8916_l5>; + pinctrl-names = "default"; pinctrl-0 = <&accel_int_default>; }; @@ -113,6 +116,9 @@ magnetometer@12 { compatible = "bosch,bmc150_magn"; reg = <0x12>; + + vdd-supply = <&pm8916_l17>; + vddio-supply = <&pm8916_l5>; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8916-samsung-a5u-eur.dts b/arch/arm64/boot/dts/qcom/msm8916-samsung-a5u-eur.dts index e39c04d977c2..dd35c3344358 100644 --- a/arch/arm64/boot/dts/qcom/msm8916-samsung-a5u-eur.dts +++ b/arch/arm64/boot/dts/qcom/msm8916-samsung-a5u-eur.dts @@ -38,7 +38,7 @@ &pronto { iris { - compatible = "qcom,wcn3680"; + compatible = "qcom,wcn3660b"; }; }; diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi b/arch/arm64/boot/dts/qcom/msm8916.dtsi index 402e891a84ab..d25f6dc751e9 100644 --- a/arch/arm64/boot/dts/qcom/msm8916.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi @@ -56,7 +56,7 @@ no-map; }; - reserved@8668000 { + reserved@86680000 { reg = <0x0 0x86680000 0x0 0x80000>; no-map; }; @@ -69,7 +69,7 @@ qcom,client-id = <1>; }; - rfsa@867e00000 { + rfsa@867e0000 { reg = <0x0 0x867e0000 0x0 0x20000>; no-map; }; diff --git a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts index ce22d4fa383e..f13a63ca8efd 100644 --- a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts +++ b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts @@ -122,7 +122,7 @@ &apps_rsc { pm8009-rpmh-regulators { - compatible = "qcom,pm8009-rpmh-regulators"; + compatible = "qcom,pm8009-1-rpmh-regulators"; qcom,pmic-id = "f"; vdd-s1-supply = <&vph_pwr>; @@ -131,6 +131,13 @@ vdd-l5-l6-supply = <&vreg_bob>; vdd-l7-supply = <&vreg_s4a_1p8>; + vreg_s2f_0p95: smps2 { + regulator-name = "vreg_s2f_0p95"; + regulator-min-microvolt = <900000>; + regulator-max-microvolt = <952000>; + regulator-initial-mode = ; + }; + vreg_l1f_1p1: ldo1 { regulator-name = "vreg_l1f_1p1"; regulator-min-microvolt = <1104000>; @@ -491,8 +498,6 @@ vqmmc-supply = <&vreg_l6c_2p96>; cd-gpios = <&tlmm 77 GPIO_ACTIVE_LOW>; bus-width = <4>; - /* there seem to be issues with HS400-1.8V mode, so disable it */ - no-1-8-v; no-sdio; no-emmc; }; @@ -706,13 +711,13 @@ cmd { pins = "sdc2_cmd"; bias-pull-up; - drive-strength = <16>; + drive-strength = <10>; }; data { pins = "sdc2_data"; bias-pull-up; - drive-strength = <16>; + drive-strength = <10>; }; }; diff --git a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts index c0b93813ea9a..c4ac6f5dc008 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-db845c.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-db845c.dts @@ -1114,11 +1114,11 @@ reg = <0x10>; // CAM0_RST_N - reset-gpios = <&tlmm 9 0>; + reset-gpios = <&tlmm 9 GPIO_ACTIVE_LOW>; pinctrl-names = "default"; pinctrl-0 = <&cam0_default>; gpios = <&tlmm 13 0>, - <&tlmm 9 0>; + <&tlmm 9 GPIO_ACTIVE_LOW>; clocks = <&clock_camcc CAM_CC_MCLK0_CLK>; clock-names = "xvclk"; diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index 65acd1f381eb..1ae90e8b70f3 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -1657,7 +1657,7 @@ clocks = <&gcc GCC_SDCC2_AHB_CLK>, <&gcc GCC_SDCC2_APPS_CLK>, - <&xo_board>; + <&rpmhcc RPMH_CXO_CLK>; clock-names = "iface", "core", "xo"; iommus = <&apps_smmu 0x4a0 0x0>; qcom,dll-config = <0x0007642c>; diff --git a/arch/arm64/boot/dts/renesas/beacon-renesom-baseboard.dtsi b/arch/arm64/boot/dts/renesas/beacon-renesom-baseboard.dtsi index e66b5b36e489..759734b7715b 100644 --- a/arch/arm64/boot/dts/renesas/beacon-renesom-baseboard.dtsi +++ b/arch/arm64/boot/dts/renesas/beacon-renesom-baseboard.dtsi @@ -150,7 +150,7 @@ regulator-name = "audio-1.8V"; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; - gpio = <&gpio_exp2 7 GPIO_ACTIVE_HIGH>; + gpio = <&gpio_exp4 1 GPIO_ACTIVE_HIGH>; enable-active-high; }; diff --git a/arch/arm64/boot/dts/renesas/beacon-renesom-som.dtsi b/arch/arm64/boot/dts/renesas/beacon-renesom-som.dtsi index 8ac167aa18f0..ea937a926c0e 100644 --- a/arch/arm64/boot/dts/renesas/beacon-renesom-som.dtsi +++ b/arch/arm64/boot/dts/renesas/beacon-renesom-som.dtsi @@ -89,7 +89,6 @@ pinctrl-names = "default"; uart-has-rtscts; status = "okay"; - max-speed = <4000000>; bluetooth { compatible = "brcm,bcm43438-bt"; @@ -98,6 +97,7 @@ device-wakeup-gpios = <&pca9654 5 GPIO_ACTIVE_HIGH>; clocks = <&osc_32k>; clock-names = "extclk"; + max-speed = <4000000>; }; }; @@ -148,7 +148,7 @@ }; eeprom@50 { - compatible = "microchip,at24c64", "atmel,24c64"; + compatible = "microchip,24c64", "atmel,24c64"; pagesize = <32>; read-only; /* Manufacturing EEPROM programmed at factory */ reg = <0x50>; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index db0d5c8e5f96..93c734d8a46c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -928,6 +928,7 @@ phy-mode = "rmii"; phy-handle = <&phy>; snps,txpbl = <0x4>; + clock_in_out = "output"; status = "disabled"; mdio { diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 34b8a89197be..cafb5b96be0e 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -55,7 +55,7 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); #define aes_mac_update neon_aes_mac_update MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON"); #endif -#if defined(USE_V8_CRYPTO_EXTENSIONS) || !defined(CONFIG_CRYPTO_AES_ARM64_BS) +#if defined(USE_V8_CRYPTO_EXTENSIONS) || !IS_ENABLED(CONFIG_CRYPTO_AES_ARM64_BS) MODULE_ALIAS_CRYPTO("ecb(aes)"); MODULE_ALIAS_CRYPTO("cbc(aes)"); MODULE_ALIAS_CRYPTO("ctr(aes)"); @@ -650,7 +650,7 @@ static int __maybe_unused xts_decrypt(struct skcipher_request *req) } static struct skcipher_alg aes_algs[] = { { -#if defined(USE_V8_CRYPTO_EXTENSIONS) || !defined(CONFIG_CRYPTO_AES_ARM64_BS) +#if defined(USE_V8_CRYPTO_EXTENSIONS) || !IS_ENABLED(CONFIG_CRYPTO_AES_ARM64_BS) .base = { .cra_name = "__ecb(aes)", .cra_driver_name = "__ecb-aes-" MODE, diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index c93121bcfdeb..c1362861765f 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -19,6 +19,7 @@ MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("sha1"); struct sha1_ce_state { struct sha1_state sst; diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 31ba3da5e61b..ded3a6488f81 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -19,6 +19,8 @@ MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("sha224"); +MODULE_ALIAS_CRYPTO("sha256"); struct sha256_ce_state { struct sha256_state sst; diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c index e5a2936f0886..7288d3046354 100644 --- a/arch/arm64/crypto/sha3-ce-glue.c +++ b/arch/arm64/crypto/sha3-ce-glue.c @@ -23,6 +23,10 @@ MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("sha3-224"); +MODULE_ALIAS_CRYPTO("sha3-256"); +MODULE_ALIAS_CRYPTO("sha3-384"); +MODULE_ALIAS_CRYPTO("sha3-512"); asmlinkage void sha3_ce_transform(u64 *st, const u8 *data, int blocks, int md_len); diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c index faa83f6cf376..a6b1adf31c56 100644 --- a/arch/arm64/crypto/sha512-ce-glue.c +++ b/arch/arm64/crypto/sha512-ce-glue.c @@ -23,6 +23,8 @@ MODULE_DESCRIPTION("SHA-384/SHA-512 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("sha384"); +MODULE_ALIAS_CRYPTO("sha512"); asmlinkage void sha512_ce_transform(struct sha512_state *sst, u8 const *src, int blocks); diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index a7f5a1bbc8ac..f97e4a459561 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -111,7 +111,7 @@ .endm /* Virtual CPU ID registers */ -.macro __init_el2_nvhe_idregs +.macro __init_el2_idregs mrs x0, midr_el1 mrs x1, mpidr_el1 msr vpidr_el2, x0 @@ -163,6 +163,7 @@ __init_el2_stage2 __init_el2_gicv3 __init_el2_hstr + __init_el2_idregs /* * When VHE is not in use, early init of EL2 needs to be done here. @@ -171,7 +172,6 @@ * will be done via the _EL1 system register aliases in __cpu_setup. */ .ifeqs "\mode", "nvhe" - __init_el2_nvhe_idregs __init_el2_nvhe_cptr __init_el2_nvhe_sve __init_el2_nvhe_prepare_eret diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 8a33d83ea843..3398477c891d 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -47,7 +47,7 @@ #define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context 2 #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa 3 #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid 4 -#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_local_vmid 5 +#define __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context 5 #define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff 6 #define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs 7 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_ich_vtr_el2 8 @@ -183,10 +183,10 @@ DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); #define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs) extern void __kvm_flush_vm_context(void); +extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu); extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, int level); extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); -extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu); extern void __kvm_timer_set_cntvoff(u64 cntvoff); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index c0450828378b..32ae676236b6 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -83,6 +83,11 @@ void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt); void __debug_switch_to_guest(struct kvm_vcpu *vcpu); void __debug_switch_to_host(struct kvm_vcpu *vcpu); +#ifdef __KVM_NVHE_HYPERVISOR__ +void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu); +void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu); +#endif + void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); @@ -97,7 +102,8 @@ bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt); void __noreturn hyp_panic(void); #ifdef __KVM_NVHE_HYPERVISOR__ -void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); +void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr, + u64 elr, u64 par); #endif #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index ff4732785c32..63b6ef2cfb52 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -315,6 +315,11 @@ static inline void *phys_to_virt(phys_addr_t x) #define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET) #if !defined(CONFIG_SPARSEMEM_VMEMMAP) || defined(CONFIG_DEBUG_VIRTUAL) +#define page_to_virt(x) ({ \ + __typeof__(x) __page = x; \ + void *__addr = __va(page_to_phys(__page)); \ + (void *)__tag_set((const void *)__addr, page_kasan_tag(__page));\ +}) #define virt_to_page(x) pfn_to_page(virt_to_pfn(x)) #else #define page_to_virt(x) ({ \ diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 0b3079fd28eb..1c364ec0ad31 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -65,10 +65,7 @@ extern u64 idmap_ptrs_per_pgd; static inline bool __cpu_uses_extended_idmap(void) { - if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52)) - return false; - - return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS)); + return unlikely(idmap_t0sz != TCR_T0SZ(vabits_actual)); } /* diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index 691f15af788e..810045628c66 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -1,7 +1,7 @@ #ifdef CONFIG_ARM64_MODULE_PLTS SECTIONS { - .plt (NOLOAD) : { BYTE(0) } - .init.plt (NOLOAD) : { BYTE(0) } - .text.ftrace_trampoline (NOLOAD) : { BYTE(0) } + .plt 0 (NOLOAD) : { BYTE(0) } + .init.plt 0 (NOLOAD) : { BYTE(0) } + .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) } } #endif diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 046be789fbb4..9a65fb528110 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -66,7 +66,6 @@ extern bool arm64_use_ng_mappings; #define _PAGE_DEFAULT (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL)) #define PAGE_KERNEL __pgprot(PROT_NORMAL) -#define PAGE_KERNEL_TAGGED __pgprot(PROT_NORMAL_TAGGED) #define PAGE_KERNEL_RO __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY) #define PAGE_KERNEL_ROX __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY) #define PAGE_KERNEL_EXEC __pgprot(PROT_NORMAL & ~PTE_PXN) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 501562793ce2..a5215d16a0f4 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -486,6 +486,9 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN) #define pgprot_device(prot) \ __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN) +#define pgprot_tagged(prot) \ + __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_TAGGED)) +#define pgprot_mhp pgprot_tagged /* * DMA allocations for non-coherent devices use what the Arm architecture calls * "Normal non-cacheable" memory, which permits speculation, unaligned accesses diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 3e6331b64932..33b6f56dcb21 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1455,7 +1455,7 @@ static bool cpu_has_broken_dbm(void) /* List of CPUs which have broken DBM support. */ static const struct midr_range cpus[] = { #ifdef CONFIG_ARM64_ERRATUM_1024718 - MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 1, 0), // A55 r0p0 -r1p0 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A55), /* Kryo4xx Silver (rdpe => r1p0) */ MIDR_REV(MIDR_QCOM_KRYO_4XX_SILVER, 0xd, 0xe), #endif diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c index e6e284265f19..58303a9ec32c 100644 --- a/arch/arm64/kernel/crash_dump.c +++ b/arch/arm64/kernel/crash_dump.c @@ -64,5 +64,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) { memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count); + *ppos += count; + return count; } diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a0dc987724ed..a0b3bfe67609 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -319,7 +319,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables) */ adrp x5, __idmap_text_end clz x5, x5 - cmp x5, TCR_T0SZ(VA_BITS) // default T0SZ small enough? + cmp x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough? b.ge 1f // .. then skip VA range extension adr_l x6, idmap_t0sz @@ -882,6 +882,7 @@ SYM_FUNC_START_LOCAL(__primary_switch) tlbi vmalle1 // Remove any stale TLB entries dsb nsh + isb msr sctlr_el1, x19 // re-enable the MMU isb diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 03210f644790..0cde47a63beb 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -182,8 +182,10 @@ static int create_dtb(struct kimage *image, /* duplicate a device tree blob */ ret = fdt_open_into(initial_boot_params, buf, buf_size); - if (ret) + if (ret) { + vfree(buf); return -EINVAL; + } ret = setup_dtb(image, initrd_load_addr, initrd_len, cmdline, buf); diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 3605f77ad4df..11852e05ee32 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -460,7 +460,7 @@ static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx) return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx)); } -static inline u32 armv8pmu_read_evcntr(int idx) +static inline u64 armv8pmu_read_evcntr(int idx) { u32 counter = ARMV8_IDX_TO_COUNTER(idx); diff --git a/arch/arm64/kernel/probes/uprobes.c b/arch/arm64/kernel/probes/uprobes.c index a412d8edbcd2..2c247634552b 100644 --- a/arch/arm64/kernel/probes/uprobes.c +++ b/arch/arm64/kernel/probes/uprobes.c @@ -38,7 +38,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, /* TODO: Currently we do not support AARCH32 instruction probing */ if (mm->context.flags & MMCF_AARCH32) - return -ENOTSUPP; + return -EOPNOTSUPP; else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE)) return -EINVAL; diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 8ac487c84e37..1d75471979cb 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1796,7 +1796,7 @@ int syscall_trace_enter(struct pt_regs *regs) if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) { tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); - if (!in_syscall(regs) || (flags & _TIF_SYSCALL_EMU)) + if (flags & _TIF_SYSCALL_EMU) return NO_SYSCALL; } diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index fa56af1a59c3..dbce0dcf4cc0 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -199,8 +199,9 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) #ifdef CONFIG_STACKTRACE -void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, - struct task_struct *task, struct pt_regs *regs) +noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task, + struct pt_regs *regs) { struct stackframe frame; @@ -208,8 +209,8 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, start_backtrace(&frame, regs->regs[29], regs->pc); else if (task == current) start_backtrace(&frame, - (unsigned long)__builtin_frame_address(0), - (unsigned long)arch_stack_walk); + (unsigned long)__builtin_frame_address(1), + (unsigned long)__builtin_return_address(0)); else start_backtrace(&frame, thread_saved_fp(task), thread_saved_pc(task)); diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index a67b37a7a47e..d7564891ffe1 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -119,7 +119,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) if (!ret) ret = -EOPNOTSUPP; } else { - __cpu_suspend_exit(); + RCU_NONIDLE(__cpu_suspend_exit()); } unpause_graph_tracing(); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fe60d25c000e..b25b4c19feeb 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -385,11 +385,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) last_ran = this_cpu_ptr(mmu->last_vcpu_ran); /* + * We guarantee that both TLBs and I-cache are private to each + * vcpu. If detecting that a vcpu from the same VM has + * previously run on the same physical CPU, call into the + * hypervisor code to nuke the relevant contexts. + * * We might get preempted before the vCPU actually runs, but * over-invalidation doesn't affect correctness. */ if (*last_ran != vcpu->vcpu_id) { - kvm_call_hyp(__kvm_tlb_flush_local_vmid, mmu); + kvm_call_hyp(__kvm_flush_cpu_context, mmu); *last_ran = vcpu->vcpu_id; } diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index b0afad7a99c6..0c66a1d408fd 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -146,7 +146,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) // Now restore the hyp regs restore_callee_saved_regs x2 - set_loaded_vcpu xzr, x1, x2 + set_loaded_vcpu xzr, x2, x3 alternative_if ARM64_HAS_RAS_EXTN // If we have the RAS extensions we can consume a pending error diff --git a/arch/arm64/kvm/hyp/nvhe/debug-sr.c b/arch/arm64/kvm/hyp/nvhe/debug-sr.c index 91a711aa8382..f401724f12ef 100644 --- a/arch/arm64/kvm/hyp/nvhe/debug-sr.c +++ b/arch/arm64/kvm/hyp/nvhe/debug-sr.c @@ -58,16 +58,24 @@ static void __debug_restore_spe(u64 pmscr_el1) write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); } -void __debug_switch_to_guest(struct kvm_vcpu *vcpu) +void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu) { /* Disable and flush SPE data generation */ __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); +} + +void __debug_switch_to_guest(struct kvm_vcpu *vcpu) +{ __debug_switch_to_guest_common(vcpu); } -void __debug_switch_to_host(struct kvm_vcpu *vcpu) +void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu) { __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); +} + +void __debug_switch_to_host(struct kvm_vcpu *vcpu) +{ __debug_switch_to_host_common(vcpu); } diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index a820dfdc9c25..3a06085aab6f 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -71,10 +71,15 @@ SYM_FUNC_START(__host_enter) SYM_FUNC_END(__host_enter) /* - * void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); + * void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr, + * u64 elr, u64 par); */ SYM_FUNC_START(__hyp_do_panic) - /* Load the format arguments into x1-7 */ + mov x29, x0 + + /* Load the format string into x0 and arguments into x1-7 */ + ldr x0, =__hyp_panic_string + mov x6, x3 get_vcpu_ptr x7, x3 @@ -89,13 +94,8 @@ SYM_FUNC_START(__hyp_do_panic) ldr lr, =panic msr elr_el2, lr - /* - * Set the panic format string and enter the host, conditionally - * restoring the host context. - */ - cmp x0, xzr - ldr x0, =__hyp_panic_string - b.eq __host_enter_without_restoring + /* Enter the host, conditionally restoring the host context. */ + cbz x29, __host_enter_without_restoring b __host_enter_for_panic SYM_FUNC_END(__hyp_do_panic) @@ -150,7 +150,7 @@ SYM_FUNC_END(__hyp_do_panic) .macro invalid_host_el1_vect .align 7 - mov x0, xzr /* restore_host = false */ + mov x0, xzr /* host_ctxt = NULL */ mrs x1, spsr_el2 mrs x2, elr_el2 mrs x3, par_el1 diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index a906f9e2ff34..1b8ef37bf805 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -46,11 +46,11 @@ static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) __kvm_tlb_flush_vmid(kern_hyp_va(mmu)); } -static void handle___kvm_tlb_flush_local_vmid(struct kvm_cpu_context *host_ctxt) +static void handle___kvm_flush_cpu_context(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); - __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu)); + __kvm_flush_cpu_context(kern_hyp_va(mmu)); } static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt) @@ -115,7 +115,7 @@ static const hcall_t *host_hcall[] = { HANDLE_FUNC(__kvm_flush_vm_context), HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), HANDLE_FUNC(__kvm_tlb_flush_vmid), - HANDLE_FUNC(__kvm_tlb_flush_local_vmid), + HANDLE_FUNC(__kvm_flush_cpu_context), HANDLE_FUNC(__kvm_timer_set_cntvoff), HANDLE_FUNC(__kvm_enable_ssbs), HANDLE_FUNC(__vgic_v3_get_ich_vtr_el2), diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index f3d0e9eca56c..68ab6b4d5141 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -192,6 +192,14 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); __sysreg_save_state_nvhe(host_ctxt); + /* + * We must flush and disable the SPE buffer for nVHE, as + * the translation regime(EL1&0) is going to be loaded with + * that of the guest. And we must do this before we change the + * translation regime to EL2 (via MDCR_EL2_E2PB == 0) and + * before we load guest Stage1. + */ + __debug_save_host_buffers_nvhe(vcpu); __adjust_pc(vcpu); @@ -234,11 +242,12 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) __fpsimd_save_fpexc32(vcpu); + __debug_switch_to_host(vcpu); /* * This must come after restoring the host sysregs, since a non-VHE * system may enable SPE here and make use of the TTBRs. */ - __debug_switch_to_host(vcpu); + __debug_restore_host_buffers_nvhe(vcpu); if (pmu_switch_needed) __pmu_switch_to_host(host_ctxt); @@ -257,7 +266,6 @@ void __noreturn hyp_panic(void) u64 spsr = read_sysreg_el2(SYS_SPSR); u64 elr = read_sysreg_el2(SYS_ELR); u64 par = read_sysreg_par(); - bool restore_host = true; struct kvm_cpu_context *host_ctxt; struct kvm_vcpu *vcpu; @@ -271,7 +279,7 @@ void __noreturn hyp_panic(void) __sysreg_restore_state_nvhe(host_ctxt); } - __hyp_do_panic(restore_host, spsr, elr, par); + __hyp_do_panic(host_ctxt, spsr, elr, par); unreachable(); } diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index fbde89a2c6e8..229b06748c20 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -123,7 +123,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) __tlb_switch_to_host(&cxt); } -void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) +void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; @@ -131,6 +131,7 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) __tlb_switch_to_guest(mmu, &cxt); __tlbi(vmalle1); + asm volatile("ic iallu"); dsb(nsh); isb(); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index bdf8e55ed308..4d99d07c610c 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -225,6 +225,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, goto out; if (!table) { + data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level)); data->addr += kvm_granule_size(level); goto out; } diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index fd7895945bbc..66f17349f0c3 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -127,7 +127,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) __tlb_switch_to_host(&cxt); } -void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) +void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu) { struct tlb_inv_context cxt; @@ -135,6 +135,7 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) __tlb_switch_to_guest(mmu, &cxt); __tlbi(vmalle1); + asm volatile("ic iallu"); dsb(nsh); isb(); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 7d2257cc5438..eebde5eb6c3d 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1309,8 +1309,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, * Prevent userspace from creating a memory region outside of the IPA * space addressable by the KVM guest IPA space. */ - if (memslot->base_gfn + memslot->npages >= - (kvm_phys_size(kvm) >> PAGE_SHIFT)) + if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT)) return -EFAULT; mmap_read_lock(current->mm); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 47f3f035f3ea..9d3d09a89894 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -324,10 +324,9 @@ int kvm_set_ipa_limit(void) } kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange); - WARN(kvm_ipa_limit < KVM_PHYS_SHIFT, - "KVM IPA Size Limit (%d bits) is smaller than default size\n", - kvm_ipa_limit); - kvm_info("IPA Size Limit: %d bits\n", kvm_ipa_limit); + kvm_info("IPA Size Limit: %d bits%s\n", kvm_ipa_limit, + ((kvm_ipa_limit < KVM_PHYS_SHIFT) ? + " (Reduced IPA size, limited VM/VMM compatibility)" : "")); return 0; } @@ -356,6 +355,11 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) return -EINVAL; } else { phys_shift = KVM_PHYS_SHIFT; + if (phys_shift > kvm_ipa_limit) { + pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", + current->comm); + return -EINVAL; + } } mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 709d98fea90c..1141075e4d53 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -230,6 +230,18 @@ int pfn_valid(unsigned long pfn) if (!valid_section(__pfn_to_section(pfn))) return 0; + + /* + * ZONE_DEVICE memory does not have the memblock entries. + * memblock_is_map_memory() check for ZONE_DEVICE based + * addresses will always fail. Even the normal hotplugged + * memory will never have MEMBLOCK_NOMAP flag set in their + * memblock entries. Skip memblock search for all non early + * memory sections covering all of hotplug memory including + * both normal and ZONE_DEVICE based. + */ + if (!early_section(__pfn_to_section(pfn))) + return pfn_section_valid(__pfn_to_section(pfn), pfn); #endif return memblock_is_map_memory(addr); } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index ae0c3d023824..6f0648777d34 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -40,7 +40,7 @@ #define NO_BLOCK_MAPPINGS BIT(0) #define NO_CONT_MAPPINGS BIT(1) -u64 idmap_t0sz = TCR_T0SZ(VA_BITS); +u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN); u64 idmap_ptrs_per_pgd = PTRS_PER_PGD; u64 __section(".mmuoff.data.write") vabits_actual; @@ -512,7 +512,8 @@ static void __init map_mem(pgd_t *pgdp) * if MTE is present. Otherwise, it has the same attributes as * PAGE_KERNEL. */ - __map_memblock(pgdp, start, end, PAGE_KERNEL_TAGGED, flags); + __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL), + flags); } /* diff --git a/arch/csky/kernel/ptrace.c b/arch/csky/kernel/ptrace.c index d822144906ac..a4cf2e2ac15a 100644 --- a/arch/csky/kernel/ptrace.c +++ b/arch/csky/kernel/ptrace.c @@ -83,7 +83,7 @@ static int gpr_get(struct task_struct *target, /* Abiv1 regs->tls is fake and we need sync here. */ regs->tls = task_thread_info(target)->tp_value; - return membuf_write(&to, regs, sizeof(regs)); + return membuf_write(&to, regs, sizeof(*regs)); } static int gpr_set(struct task_struct *target, diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h index 6c6f16e409a8..0d23c0049301 100644 --- a/arch/ia64/include/asm/syscall.h +++ b/arch/ia64/include/asm/syscall.h @@ -32,7 +32,7 @@ static inline void syscall_rollback(struct task_struct *task, static inline long syscall_get_error(struct task_struct *task, struct pt_regs *regs) { - return regs->r10 == -1 ? regs->r8:0; + return regs->r10 == -1 ? -regs->r8:0; } static inline long syscall_get_return_value(struct task_struct *task, diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index c3490ee2daa5..e14f5653393a 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -2013,27 +2013,39 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data) { struct syscall_get_set_args *args = data; struct pt_regs *pt = args->regs; - unsigned long *krbs, cfm, ndirty; + unsigned long *krbs, cfm, ndirty, nlocals, nouts; int i, count; if (unw_unwind_to_user(info) < 0) return; + /* + * We get here via a few paths: + * - break instruction: cfm is shared with caller. + * syscall args are in out= regs, locals are non-empty. + * - epsinstruction: cfm is set by br.call + * locals don't exist. + * + * For both cases argguments are reachable in cfm.sof - cfm.sol. + * CFM: [ ... | sor: 17..14 | sol : 13..7 | sof : 6..0 ] + */ cfm = pt->cr_ifs; + nlocals = (cfm >> 7) & 0x7f; /* aka sol */ + nouts = (cfm & 0x7f) - nlocals; /* aka sof - sol */ krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8; ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); count = 0; if (in_syscall(pt)) - count = min_t(int, args->n, cfm & 0x7f); + count = min_t(int, args->n, nouts); + /* Iterate over outs. */ for (i = 0; i < count; i++) { + int j = ndirty + nlocals + i + args->i; if (args->rw) - *ia64_rse_skip_regs(krbs, ndirty + i + args->i) = - args->args[i]; + *ia64_rse_skip_regs(krbs, j) = args->args[i]; else - args->args[i] = *ia64_rse_skip_regs(krbs, - ndirty + i + args->i); + args->args[i] = *ia64_rse_skip_regs(krbs, j); } if (!args->rw) { diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index e67b22fc3c60..c1b299760bf7 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -341,7 +341,8 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall) * need to push through a forced SIGSEGV. */ while (1) { - get_signal(&ksig); + if (!get_signal(&ksig)) + break; /* * get_signal() may have run a debugger (via notify_parent()) diff --git a/arch/mips/Makefile b/arch/mips/Makefile index cd4343edeb11..5ffdd67093bc 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -136,6 +136,25 @@ cflags-$(CONFIG_SB1XXX_CORELIS) += $(call cc-option,-mno-sched-prolog) \ # cflags-y += -fno-stack-check +# binutils from v2.35 when built with --enable-mips-fix-loongson3-llsc=yes, +# supports an -mfix-loongson3-llsc flag which emits a sync prior to each ll +# instruction to work around a CPU bug (see __SYNC_loongson3_war in asm/sync.h +# for a description). +# +# We disable this in order to prevent the assembler meddling with the +# instruction that labels refer to, ie. if we label an ll instruction: +# +# 1: ll v0, 0(a0) +# +# ...then with the assembler fix applied the label may actually point at a sync +# instruction inserted by the assembler, and if we were using the label in an +# exception table the table would no longer contain the address of the ll +# instruction. +# +# Avoid this by explicitly disabling that assembler behaviour. +# +cflags-y += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,) + # # CPU-dependent compiler/assembler options for optimization. # diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 47cd9dc7454a..f93f72bcba97 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -37,6 +37,7 @@ KBUILD_AFLAGS := $(KBUILD_AFLAGS) -D__ASSEMBLY__ \ # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. KCOV_INSTRUMENT := n GCOV_PROFILE := n +UBSAN_SANITIZE := n # decompressor objects (linked with vmlinuz) vmlinuzobjs-y := $(obj)/head.o $(obj)/decompress.o $(obj)/string.o diff --git a/arch/mips/cavium-octeon/setup.c b/arch/mips/cavium-octeon/setup.c index 982826ba0ef7..ce4e2806159b 100644 --- a/arch/mips/cavium-octeon/setup.c +++ b/arch/mips/cavium-octeon/setup.c @@ -1149,12 +1149,15 @@ void __init device_tree_init(void) bool do_prune; bool fill_mac; - if (fw_passed_dtb) { - fdt = (void *)fw_passed_dtb; +#ifdef CONFIG_MIPS_ELF_APPENDED_DTB + if (!fdt_check_header(&__appended_dtb)) { + fdt = &__appended_dtb; do_prune = false; fill_mac = true; pr_info("Using appended Device Tree.\n"); - } else if (octeon_bootinfo->minor_version >= 3 && octeon_bootinfo->fdt_addr) { + } else +#endif + if (octeon_bootinfo->minor_version >= 3 && octeon_bootinfo->fdt_addr) { fdt = phys_to_virt(octeon_bootinfo->fdt_addr); if (fdt_check_header(fdt)) panic("Corrupt Device Tree passed to kernel."); diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile index 8e1deaf00e0c..5e4105cccf9f 100644 --- a/arch/mips/crypto/Makefile +++ b/arch/mips/crypto/Makefile @@ -12,8 +12,8 @@ AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o poly1305-mips-y := poly1305-core.o poly1305-glue.o -perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32 -perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64 +perlasm-flavour-$(CONFIG_32BIT) := o32 +perlasm-flavour-$(CONFIG_64BIT) := 64 quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@) diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h index 3682d1a0bb80..ea4b62ece336 100644 --- a/arch/mips/include/asm/asm.h +++ b/arch/mips/include/asm/asm.h @@ -20,10 +20,27 @@ #include #include +#ifndef __VDSO__ +/* + * Emit CFI data in .debug_frame sections, not .eh_frame sections. + * We don't do DWARF unwinding at runtime, so only the offline DWARF + * information is useful to anyone. Note we should change this if we + * ever decide to enable DWARF unwinding at runtime. + */ +#define CFI_SECTIONS .cfi_sections .debug_frame +#else + /* + * For the vDSO, emit both runtime unwind information and debug + * symbols for the .dbg file. + */ +#define CFI_SECTIONS +#endif + /* * LEAF - declare leaf routine */ #define LEAF(symbol) \ + CFI_SECTIONS; \ .globl symbol; \ .align 2; \ .type symbol, @function; \ @@ -36,6 +53,7 @@ symbol: .frame sp, 0, ra; \ * NESTED - declare nested routine entry point */ #define NESTED(symbol, framesize, rpc) \ + CFI_SECTIONS; \ .globl symbol; \ .align 2; \ .type symbol, @function; \ diff --git a/arch/mips/include/asm/atomic.h b/arch/mips/include/asm/atomic.h index f904084fcb1f..27ad76791539 100644 --- a/arch/mips/include/asm/atomic.h +++ b/arch/mips/include/asm/atomic.h @@ -248,7 +248,7 @@ static __inline__ int pfx##_sub_if_positive(type i, pfx##_t * v) \ * bltz that can branch to code outside of the LL/SC loop. As \ * such, we don't need to emit another barrier here. \ */ \ - if (!__SYNC_loongson3_war) \ + if (__SYNC_loongson3_war == 0) \ smp_mb__after_atomic(); \ \ return result; \ diff --git a/arch/mips/include/asm/cmpxchg.h b/arch/mips/include/asm/cmpxchg.h index 5b0b3a6777ea..ed8f3f3c4304 100644 --- a/arch/mips/include/asm/cmpxchg.h +++ b/arch/mips/include/asm/cmpxchg.h @@ -99,7 +99,7 @@ unsigned long __xchg(volatile void *ptr, unsigned long x, int size) * contains a completion barrier prior to the LL, so we don't \ * need to emit an extra one here. \ */ \ - if (!__SYNC_loongson3_war) \ + if (__SYNC_loongson3_war == 0) \ smp_mb__before_llsc(); \ \ __res = (__typeof__(*(ptr))) \ @@ -191,7 +191,7 @@ unsigned long __cmpxchg(volatile void *ptr, unsigned long old, * contains a completion barrier prior to the LL, so we don't \ * need to emit an extra one here. \ */ \ - if (!__SYNC_loongson3_war) \ + if (__SYNC_loongson3_war == 0) \ smp_mb__before_llsc(); \ \ __res = cmpxchg_local((ptr), (old), (new)); \ @@ -201,7 +201,7 @@ unsigned long __cmpxchg(volatile void *ptr, unsigned long old, * contains a completion barrier after the SC, so we don't \ * need to emit an extra one here. \ */ \ - if (!__SYNC_loongson3_war) \ + if (__SYNC_loongson3_war == 0) \ smp_llsc_mb(); \ \ __res; \ diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index 6a77bc4a6eec..74082e35d57c 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -255,6 +255,12 @@ extern bool __virt_addr_valid(const volatile void *kaddr); #define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC +extern unsigned long __kaslr_offset; +static inline unsigned long kaslr_offset(void) +{ + return __kaslr_offset; +} + #include #include diff --git a/arch/mips/include/asm/traps.h b/arch/mips/include/asm/traps.h index 6a0864bb604d..9038b91e2d8c 100644 --- a/arch/mips/include/asm/traps.h +++ b/arch/mips/include/asm/traps.h @@ -24,6 +24,9 @@ extern void (*board_ebase_setup)(void); extern void (*board_cache_error_setup)(void); extern int register_nmi_notifier(struct notifier_block *nb); +extern void reserve_exception_space(phys_addr_t addr, unsigned long size); + +#define VECTORSPACING 0x100 /* for EI/VI mode */ #define nmi_notifier(fn, pri) \ ({ \ diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index e6853697a056..21794db53c05 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "fpu-probe.h" @@ -1619,6 +1620,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) c->cputype = CPU_BMIPS3300; __cpu_name[cpu] = "Broadcom BMIPS3300"; set_elf_platform(cpu, "bmips3300"); + reserve_exception_space(0x400, VECTORSPACING * 64); break; case PRID_IMP_BMIPS43XX: { int rev = c->processor_id & PRID_REV_MASK; @@ -1629,6 +1631,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "Broadcom BMIPS4380"; set_elf_platform(cpu, "bmips4380"); c->options |= MIPS_CPU_RIXI; + reserve_exception_space(0x400, VECTORSPACING * 64); } else { c->cputype = CPU_BMIPS4350; __cpu_name[cpu] = "Broadcom BMIPS4350"; @@ -1645,6 +1648,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "Broadcom BMIPS5000"; set_elf_platform(cpu, "bmips5000"); c->options |= MIPS_CPU_ULRI | MIPS_CPU_RIXI; + reserve_exception_space(0x1000, VECTORSPACING * 64); break; } } @@ -1830,16 +1834,17 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu) */ case PRID_COMP_INGENIC_D0: c->isa_level &= ~MIPS_CPU_ISA_M32R2; - break; + fallthrough; /* * The config0 register in the XBurst CPUs with a processor ID of - * PRID_COMP_INGENIC_D1 has an abandoned huge page tlb mode, this - * mode is not compatible with the MIPS standard, it will cause - * tlbmiss and into an infinite loop (line 21 in the tlb-funcs.S) - * when starting the init process. After chip reset, the default - * is HPTLB mode, Write 0xa9000000 to cp0 register 5 sel 4 to - * switch back to VTLB mode to prevent getting stuck. + * PRID_COMP_INGENIC_D0 or PRID_COMP_INGENIC_D1 has an abandoned + * huge page tlb mode, this mode is not compatible with the MIPS + * standard, it will cause tlbmiss and into an infinite loop + * (line 21 in the tlb-funcs.S) when starting the init process. + * After chip reset, the default is HPTLB mode, Write 0xa9000000 + * to cp0 register 5 sel 4 to switch back to VTLB mode to prevent + * getting stuck. */ case PRID_COMP_INGENIC_D1: write_c0_page_ctrl(XBURST_PAGECTRL_HPTLB_DIS); @@ -2123,6 +2128,8 @@ void cpu_probe(void) if (cpu == 0) __ua_limit = ~((1ull << cpu_vmbits) - 1); #endif + + reserve_exception_space(0, 0x1000); } void cpu_report(void) diff --git a/arch/mips/kernel/cpu-r3k-probe.c b/arch/mips/kernel/cpu-r3k-probe.c index abdbbe8c5a43..af654771918c 100644 --- a/arch/mips/kernel/cpu-r3k-probe.c +++ b/arch/mips/kernel/cpu-r3k-probe.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "fpu-probe.h" @@ -158,6 +159,8 @@ void cpu_probe(void) cpu_set_fpu_opts(c); else cpu_set_nofpu_opts(c); + + reserve_exception_space(0, 0x400); } void cpu_report(void) diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c index 0e365b7c742d..ac16cf2716df 100644 --- a/arch/mips/kernel/relocate.c +++ b/arch/mips/kernel/relocate.c @@ -300,6 +300,13 @@ static inline int __init relocation_addr_valid(void *loc_new) return 1; } +static inline void __init update_kaslr_offset(unsigned long *addr, long offset) +{ + unsigned long *new_addr = (unsigned long *)RELOCATED(addr); + + *new_addr = (unsigned long)offset; +} + #if defined(CONFIG_USE_OF) void __weak *plat_get_fdt(void) { @@ -410,6 +417,9 @@ void *__init relocate_kernel(void) /* Return the new kernel's entry point */ kernel_entry = RELOCATED(start_kernel); + + /* Error may occur before, so keep it at last */ + update_kaslr_offset(&__kaslr_offset, offset); } out: return kernel_entry; diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 7e1f8e277437..83ec0d5a0918 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -84,6 +84,9 @@ static struct resource code_resource = { .name = "Kernel code", }; static struct resource data_resource = { .name = "Kernel data", }; static struct resource bss_resource = { .name = "Kernel bss", }; +unsigned long __kaslr_offset __ro_after_init; +EXPORT_SYMBOL(__kaslr_offset); + static void *detect_magic __initdata = detect_memory_region; #ifdef CONFIG_MIPS_AUTO_PFN_OFFSET diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index e0352958e2f7..808b8b61ded1 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2009,13 +2009,16 @@ void __noreturn nmi_exception_handler(struct pt_regs *regs) nmi_exit(); } -#define VECTORSPACING 0x100 /* for EI/VI mode */ - unsigned long ebase; EXPORT_SYMBOL_GPL(ebase); unsigned long exception_handlers[32]; unsigned long vi_handlers[64]; +void reserve_exception_space(phys_addr_t addr, unsigned long size) +{ + memblock_reserve(addr, size); +} + void __init *set_except_vector(int n, void *addr) { unsigned long handler = (unsigned long) addr; @@ -2367,10 +2370,7 @@ void __init trap_init(void) if (!cpu_has_mips_r2_r6) { ebase = CAC_BASE; - ebase_pa = virt_to_phys((void *)ebase); vec_size = 0x400; - - memblock_reserve(ebase_pa, vec_size); } else { if (cpu_has_veic || cpu_has_vint) vec_size = 0x200 + VECTORSPACING*64; diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 5e97e9d02f98..09fa4705ce8e 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -90,6 +90,7 @@ SECTIONS INIT_TASK_DATA(THREAD_SIZE) NOSAVE_DATA + PAGE_ALIGNED_DATA(PAGE_SIZE) CACHELINE_ALIGNED_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) READ_MOSTLY_DATA(1 << CONFIG_MIPS_L1_CACHE_SHIFT) DATA_DATA @@ -223,6 +224,5 @@ SECTIONS *(.options) *(.pdr) *(.reginfo) - *(.eh_frame) } } diff --git a/arch/mips/lantiq/irq.c b/arch/mips/lantiq/irq.c index df8eed3875f6..43c2f271e6ab 100644 --- a/arch/mips/lantiq/irq.c +++ b/arch/mips/lantiq/irq.c @@ -302,7 +302,7 @@ static void ltq_hw_irq_handler(struct irq_desc *desc) generic_handle_irq(irq_linear_revmap(ltq_domain, hwirq)); /* if this is a EBU irq, we need to ack it or get a deadlock */ - if ((irq == LTQ_ICU_EBU_IRQ) && (module == 0) && LTQ_EBU_PCC_ISTAT) + if (irq == LTQ_ICU_EBU_IRQ && !module && LTQ_EBU_PCC_ISTAT != 0) ltq_ebu_w32(ltq_ebu_r32(LTQ_EBU_PCC_ISTAT) | 0x10, LTQ_EBU_PCC_ISTAT); } diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index ec42c5085905..e2354e128d9a 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -5,28 +5,6 @@ cflags-$(CONFIG_CPU_LOONGSON64) += -Wa,--trap -# -# Some versions of binutils, not currently mainline as of 2019/02/04, support -# an -mfix-loongson3-llsc flag which emits a sync prior to each ll instruction -# to work around a CPU bug (see __SYNC_loongson3_war in asm/sync.h for a -# description). -# -# We disable this in order to prevent the assembler meddling with the -# instruction that labels refer to, ie. if we label an ll instruction: -# -# 1: ll v0, 0(a0) -# -# ...then with the assembler fix applied the label may actually point at a sync -# instruction inserted by the assembler, and if we were using the label in an -# exception table the table would no longer contain the address of the ll -# instruction. -# -# Avoid this by explicitly disabling that assembler behaviour. If upstream -# binutils does not merge support for the flag then we can revisit & remove -# this later - for now it ensures vendor toolchains don't cause problems. -# -cflags-$(CONFIG_CPU_LOONGSON64) += $(call as-option,-Wa$(comma)-mno-fix-loongson3-llsc,) - # # binutils from v2.25 on and gcc starting from v4.9.0 treat -march=loongson3a # as MIPS64 R2; older versions as just R1. This leaves the possibility open diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 4f976d687ab0..f67297b3175f 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1593,7 +1593,7 @@ static int probe_scache(void) return 1; } -static void __init loongson2_sc_init(void) +static void loongson2_sc_init(void) { struct cpuinfo_mips *c = ¤t_cpu_data; diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index 5810cc12bc1d..2131d3fd7333 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -16,16 +16,13 @@ ccflags-vdso := \ $(filter -march=%,$(KBUILD_CFLAGS)) \ $(filter -m%-float,$(KBUILD_CFLAGS)) \ $(filter -mno-loongson-%,$(KBUILD_CFLAGS)) \ + $(CLANG_FLAGS) \ -D__VDSO__ ifndef CONFIG_64BIT ccflags-vdso += -DBUILD_VDSO32 endif -ifdef CONFIG_CC_IS_CLANG -ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS)) -endif - # # The -fno-jump-tables flag only prevents the compiler from generating # jump tables but does not prevent the compiler from emitting absolute diff --git a/arch/nios2/kernel/entry.S b/arch/nios2/kernel/entry.S index da8442450e46..0794cd7803df 100644 --- a/arch/nios2/kernel/entry.S +++ b/arch/nios2/kernel/entry.S @@ -389,7 +389,10 @@ ENTRY(ret_from_interrupt) */ ENTRY(sys_clone) SAVE_SWITCH_STACK + subi sp, sp, 4 /* make space for tls pointer */ + stw r8, 0(sp) /* pass tls pointer (r8) via stack (5th argument) */ call nios2_clone + addi sp, sp, 4 RESTORE_SWITCH_STACK ret diff --git a/arch/nios2/kernel/sys_nios2.c b/arch/nios2/kernel/sys_nios2.c index cd390ec4f88b..b1ca85699952 100644 --- a/arch/nios2/kernel/sys_nios2.c +++ b/arch/nios2/kernel/sys_nios2.c @@ -22,6 +22,7 @@ asmlinkage int sys_cacheflush(unsigned long addr, unsigned long len, unsigned int op) { struct vm_area_struct *vma; + struct mm_struct *mm = current->mm; if (len == 0) return 0; @@ -34,16 +35,22 @@ asmlinkage int sys_cacheflush(unsigned long addr, unsigned long len, if (addr + len < addr) return -EFAULT; + if (mmap_read_lock_killable(mm)) + return -EINTR; + /* * Verify that the specified address region actually belongs * to this process. */ - vma = find_vma(current->mm, addr); - if (vma == NULL || addr < vma->vm_start || addr + len > vma->vm_end) + vma = find_vma(mm, addr); + if (vma == NULL || addr < vma->vm_start || addr + len > vma->vm_end) { + mmap_read_unlock(mm); return -EFAULT; + } flush_cache_range(vma, addr, addr + len); + mmap_read_unlock(mm); return 0; } diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 278462186ac4..22cf9da1e4a7 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -201,9 +201,12 @@ config PREFETCH def_bool y depends on PA8X00 || PA7200 +config PARISC_HUGE_KERNEL + def_bool y if !MODULES || UBSAN || FTRACE || COMPILE_TEST + config MLONGCALLS - def_bool y if !MODULES || UBSAN || FTRACE - bool "Enable the -mlong-calls compiler option for big kernels" if MODULES && !UBSAN && !FTRACE + def_bool y if PARISC_HUGE_KERNEL + bool "Enable the -mlong-calls compiler option for big kernels" if !PARISC_HUGE_KERNEL depends on PA8X00 help If you configure the kernel to include many drivers built-in instead diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index 49cd6d2caefb..1dfb439b0692 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -373,7 +373,11 @@ static inline int eirr_to_irq(unsigned long eirr) /* * IRQ STACK - used for irq handler */ +#ifdef CONFIG_64BIT +#define IRQ_STACK_SIZE (4096 << 4) /* 64k irq stack size */ +#else #define IRQ_STACK_SIZE (4096 << 3) /* 32k irq stack size */ +#endif union irq_stack_union { unsigned long stack[IRQ_STACK_SIZE/sizeof(unsigned long)]; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 107bb4319e0e..a685e42d3993 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -772,7 +772,7 @@ config PPC_64K_PAGES config PPC_256K_PAGES bool "256k page size" - depends on 44x && !STDBINUTILS + depends on 44x && !STDBINUTILS && !PPC_47x help Make the page size 256k. diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h index eacc9102c251..d5b3c3bb95b4 100644 --- a/arch/powerpc/include/asm/code-patching.h +++ b/arch/powerpc/include/asm/code-patching.h @@ -73,7 +73,7 @@ void __patch_exception(int exc, unsigned long addr); #endif #define OP_RT_RA_MASK 0xffff0000UL -#define LIS_R2 0x3c020000UL +#define LIS_R2 0x3c400000UL #define ADDIS_R2_R12 0x3c4c0000UL #define ADDI_R2_R2 0x38420000UL diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index 7897d16e0990..727d4b321937 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -7,7 +7,7 @@ #include #include -static inline bool early_cpu_has_feature(unsigned long feature) +static __always_inline bool early_cpu_has_feature(unsigned long feature) { return !!((CPU_FTRS_ALWAYS & feature) || (CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature)); @@ -46,7 +46,7 @@ static __always_inline bool cpu_has_feature(unsigned long feature) return static_branch_likely(&cpu_feature_keys[i]); } #else -static inline bool cpu_has_feature(unsigned long feature) +static __always_inline bool cpu_has_feature(unsigned long feature) { return early_cpu_has_feature(feature); } diff --git a/arch/powerpc/include/asm/dcr-native.h b/arch/powerpc/include/asm/dcr-native.h index 7141ccea8c94..a92059964579 100644 --- a/arch/powerpc/include/asm/dcr-native.h +++ b/arch/powerpc/include/asm/dcr-native.h @@ -53,8 +53,8 @@ static inline void mtdcrx(unsigned int reg, unsigned int val) #define mfdcr(rn) \ ({unsigned int rval; \ if (__builtin_constant_p(rn) && rn < 1024) \ - asm volatile("mfdcr %0," __stringify(rn) \ - : "=r" (rval)); \ + asm volatile("mfdcr %0, %1" : "=r" (rval) \ + : "n" (rn)); \ else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR))) \ rval = mfdcrx(rn); \ else \ @@ -64,8 +64,8 @@ static inline void mtdcrx(unsigned int reg, unsigned int val) #define mtdcr(rn, v) \ do { \ if (__builtin_constant_p(rn) && rn < 1024) \ - asm volatile("mtdcr " __stringify(rn) ",%0" \ - : : "r" (v)); \ + asm volatile("mtdcr %0, %1" \ + : : "n" (rn), "r" (v)); \ else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR))) \ mtdcrx(rn, v); \ else \ diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h index 55d6ede30c19..9ab344d29a54 100644 --- a/arch/powerpc/include/asm/kexec.h +++ b/arch/powerpc/include/asm/kexec.h @@ -136,6 +136,7 @@ int load_crashdump_segments_ppc64(struct kimage *image, int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, const void *fdt, unsigned long kernel_load_addr, unsigned long fdt_load_addr); +unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image); int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, unsigned long initrd_load_addr, unsigned long initrd_len, const char *cmdline); diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index cf6ebbc16cb4..764f2732a821 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -59,6 +59,9 @@ struct machdep_calls { int (*pcibios_root_bridge_prepare)(struct pci_host_bridge *bridge); + /* finds all the pci_controllers present at boot */ + void (*discover_phbs)(void); + /* To setup PHBs when using automatic OF platform driver for PCI */ int (*pci_setup_phb)(struct pci_controller *host); diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index edc08f04aef7..5d1726bb28e7 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -10,6 +10,7 @@ #endif #ifdef CONFIG_PPC_SPLPAR +#include #include #include diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index 58f9dc060a7b..42e9bc4018da 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -70,6 +70,9 @@ struct pt_regs }; #endif + +#define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)) + #ifdef __powerpc64__ /* @@ -192,7 +195,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) #define TRAP_FLAGS_MASK 0x11 #define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) #define FULL_REGS(regs) (((regs)->trap & 1) == 0) -#define SET_FULL_REGS(regs) ((regs)->trap |= 1) +#define SET_FULL_REGS(regs) ((regs)->trap &= ~1) #endif #define CHECK_FULL_REGS(regs) BUG_ON(!FULL_REGS(regs)) #define NV_REG_POISON 0xdeadbeefdeadbeefUL @@ -207,7 +210,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc) #define TRAP_FLAGS_MASK 0x1F #define TRAP(regs) ((regs)->trap & ~TRAP_FLAGS_MASK) #define FULL_REGS(regs) (((regs)->trap & 1) == 0) -#define SET_FULL_REGS(regs) ((regs)->trap |= 1) +#define SET_FULL_REGS(regs) ((regs)->trap &= ~1) #define IS_CRITICAL_EXC(regs) (((regs)->trap & 2) != 0) #define IS_MCHECK_EXC(regs) (((regs)->trap & 4) != 0) #define IS_DEBUG_EXC(regs) (((regs)->trap & 8) != 0) diff --git a/arch/powerpc/include/asm/switch_to.h b/arch/powerpc/include/asm/switch_to.h index fdab93428372..9d1fbd8be1c7 100644 --- a/arch/powerpc/include/asm/switch_to.h +++ b/arch/powerpc/include/asm/switch_to.h @@ -71,6 +71,16 @@ static inline void disable_kernel_vsx(void) { msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX); } +#else +static inline void enable_kernel_vsx(void) +{ + BUILD_BUG(); +} + +static inline void disable_kernel_vsx(void) +{ + BUILD_BUG(); +} #endif #ifdef CONFIG_SPE diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index 501c9a79038c..f53bfefb4a57 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -216,8 +216,6 @@ do { \ #define __put_user_nocheck_goto(x, ptr, size, label) \ do { \ __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ - if (!is_kernel_addr((unsigned long)__pu_addr)) \ - might_fault(); \ __chk_user_ptr(ptr); \ __put_user_size_goto((x), __pu_addr, (size), label); \ } while (0) @@ -313,7 +311,7 @@ do { \ __typeof__(size) __gu_size = (size); \ \ __chk_user_ptr(__gu_addr); \ - if (!is_kernel_addr((unsigned long)__gu_addr)) \ + if (do_allow && !is_kernel_addr((unsigned long)__gu_addr)) \ might_fault(); \ barrier_nospec(); \ if (do_allow) \ @@ -508,6 +506,9 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t { if (unlikely(!access_ok(ptr, len))) return false; + + might_fault(); + allow_read_write_user((void __user *)ptr, ptr, len); return true; } @@ -521,6 +522,9 @@ user_read_access_begin(const void __user *ptr, size_t len) { if (unlikely(!access_ok(ptr, len))) return false; + + might_fault(); + allow_read_from_user(ptr, len); return true; } @@ -532,6 +536,9 @@ user_write_access_begin(const void __user *ptr, size_t len) { if (unlikely(!access_ok(ptr, len))) return false; + + might_fault(); + allow_write_to_user((void __user *)ptr, len); return true; } diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index b12d7c049bfe..989006b5ad0f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -309,7 +309,7 @@ int main(void) /* Interrupt register frame */ DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE); - DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs)); + DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS); STACK_PT_REGS_OFFSET(GPR0, gpr[0]); STACK_PT_REGS_OFFSET(GPR1, gpr[1]); STACK_PT_REGS_OFFSET(GPR2, gpr[2]); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 1c9b0ccc2172..9bc4e7dd0bee 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -356,6 +356,9 @@ trace_syscall_entry_irq_off: .globl transfer_to_syscall transfer_to_syscall: +#ifdef CONFIG_PPC_BOOK3S_32 + kuep_lock r11, r12 +#endif #ifdef CONFIG_TRACE_IRQFLAGS andi. r12,r9,MSR_EE beq- trace_syscall_entry_irq_off diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6e53f7638737..de988770a7e4 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -470,7 +470,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real) ld r10,PACAKMSR(r13) /* get MSR value for kernel */ /* MSR[RI] is clear iff using SRR regs */ - .if IHSRR == EXC_HV_OR_STD + .if IHSRR_IF_HVMODE BEGIN_FTR_SECTION xori r10,r10,MSR_RI END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h index a2f72c966baf..abc7b603ab65 100644 --- a/arch/powerpc/kernel/head_32.h +++ b/arch/powerpc/kernel/head_32.h @@ -47,7 +47,7 @@ lwz r1,TASK_STACK-THREAD(r1) addi r1, r1, THREAD_SIZE - INT_FRAME_SIZE 1: - mtcrf 0x7f, r1 + mtcrf 0x3f, r1 bt 32 - THREAD_ALIGN_SHIFT, stack_overflow #else subi r11, r1, INT_FRAME_SIZE /* use r1 if kernel */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 52702f3db6df..9eb63cf6ac38 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -165,7 +165,7 @@ SystemCall: /* On the MPC8xx, this is a software emulation interrupt. It occurs * for all unimplemented and illegal instructions. */ - EXCEPTION(0x1000, SoftEmu, program_check_exception, EXC_XFER_STD) + EXCEPTION(0x1000, SoftEmu, emulation_assist_interrupt, EXC_XFER_STD) . = 0x1100 /* diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 858fbc8b19f3..b1a1a928fcb8 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -278,12 +278,6 @@ MachineCheck: 7: EXCEPTION_PROLOG_2 addi r3,r1,STACK_FRAME_OVERHEAD #ifdef CONFIG_PPC_CHRP -#ifdef CONFIG_VMAP_STACK - mfspr r4, SPRN_SPRG_THREAD - tovirt(r4, r4) - lwz r4, RTAS_SP(r4) - cmpwi cr1, r4, 0 -#endif beq cr1, machine_check_tramp twi 31, 0, 0 #else @@ -453,11 +447,12 @@ InstructionTLBMiss: cmplw 0,r1,r3 #endif mfspr r2, SPRN_SDR1 - li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC + li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 #ifdef CONFIG_MODULES bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ #endif 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ @@ -516,10 +511,11 @@ DataLoadTLBMiss: lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SDR1 - li r1, _PAGE_PRESENT | _PAGE_ACCESSED + li r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + li r1, _PAGE_PRESENT | _PAGE_ACCESSED addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ @@ -593,10 +589,11 @@ DataStoreTLBMiss: lis r1, TASK_SIZE@h /* check if kernel address */ cmplw 0,r1,r3 mfspr r2, SPRN_SDR1 - li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER rlwinm r2, r2, 28, 0xfffff000 bgt- 112f lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ 112: rlwimi r2,r3,12,20,29 /* insert top 10 bits of address */ lwz r2,0(r2) /* get pmd entry */ diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index cc7a6271b6b4..e8a548447dd6 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -269,6 +269,31 @@ void replay_soft_interrupts(void) } } +#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP) +static inline void replay_soft_interrupts_irqrestore(void) +{ + unsigned long kuap_state = get_kuap(); + + /* + * Check if anything calls local_irq_enable/restore() when KUAP is + * disabled (user access enabled). We handle that case here by saving + * and re-locking AMR but we shouldn't get here in the first place, + * hence the warning. + */ + kuap_check_amr(); + + if (kuap_state != AMR_KUAP_BLOCKED) + set_kuap(AMR_KUAP_BLOCKED); + + replay_soft_interrupts(); + + if (kuap_state != AMR_KUAP_BLOCKED) + set_kuap(kuap_state); +} +#else +#define replay_soft_interrupts_irqrestore() replay_soft_interrupts() +#endif + notrace void arch_local_irq_restore(unsigned long mask) { unsigned char irq_happened; @@ -332,7 +357,7 @@ notrace void arch_local_irq_restore(unsigned long mask) irq_soft_mask_set(IRQS_ALL_DISABLED); trace_hardirqs_off(); - replay_soft_interrupts(); + replay_soft_interrupts_irqrestore(); local_paca->irq_happened = 0; trace_hardirqs_on(); diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 2b555997b295..001e90cd8948 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -1699,3 +1699,13 @@ static void fixup_hide_host_resource_fsl(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MOTOROLA, PCI_ANY_ID, fixup_hide_host_resource_fsl); DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_FREESCALE, PCI_ANY_ID, fixup_hide_host_resource_fsl); + + +static int __init discover_phbs(void) +{ + if (ppc_md.discover_phbs) + ppc_md.discover_phbs(); + + return 0; +} +core_initcall(discover_phbs); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a66f435dabbf..b65a73e4d642 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -2176,7 +2176,7 @@ void show_stack(struct task_struct *tsk, unsigned long *stack, * See if this is an exception frame. * We look for the "regshere" marker in the current frame. */ - if (validate_sp(sp, tsk, STACK_INT_FRAME_SIZE) + if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS) && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) { struct pt_regs *regs = (struct pt_regs *) (sp + STACK_FRAME_OVERHEAD); diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index e9d4eb6144e1..ccf77b985c8f 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -1331,14 +1331,10 @@ static void __init prom_check_platform_support(void) if (prop_len > sizeof(vec)) prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n", prop_len); - prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", - &vec, sizeof(vec)); - for (i = 0; i < sizeof(vec); i += 2) { - prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 - , vec[i] - , vec[i + 1]); - prom_parse_platform_support(vec[i], vec[i + 1], - &supported); + prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", &vec, sizeof(vec)); + for (i = 0; i < prop_len; i += 2) { + prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2, vec[i], vec[i + 1]); + prom_parse_platform_support(vec[i], vec[i + 1], &supported); } } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 67feb3524460..83633a24ce78 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include @@ -1030,6 +1031,7 @@ void __init time_init(void) tick_setup_hrtimer_broadcast(); of_clk_init(NULL); + enable_sched_clock_irqtime(); } /* diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3ec7b443fe6b..4be05517f2db 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -503,8 +503,11 @@ void system_reset_exception(struct pt_regs *regs) die("Unrecoverable nested System Reset", regs, SIGABRT); #endif /* Must die if the interrupt is not recoverable */ - if (!(regs->msr & MSR_RI)) + if (!(regs->msr & MSR_RI)) { + /* For the reason explained in die_mce, nmi_exit before die */ + nmi_exit(); die("Unrecoverable System Reset", regs, SIGABRT); + } if (saved_hsrrs) { mtspr(SPRN_HSRR0, hsrr0); diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index a6e29f880e0e..d21d08140a5e 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -65,3 +65,14 @@ V_FUNCTION_END(__kernel_clock_getres) V_FUNCTION_BEGIN(__kernel_time) cvdso_call_time __c_kernel_time V_FUNCTION_END(__kernel_time) + +/* Routines for restoring integer registers, called by the compiler. */ +/* Called with r11 pointing to the stack header word of the caller of the */ +/* function, just beyond the end of the integer restore area. */ +_GLOBAL(_restgpr_31_x) +_GLOBAL(_rest32gpr_31_x) + lwz r0,4(r11) + lwz r31,-4(r11) + mtlr r0 + mr r1,r11 + blr diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c index d0e459bb2f05..9842e33533df 100644 --- a/arch/powerpc/kexec/elf_64.c +++ b/arch/powerpc/kexec/elf_64.c @@ -102,7 +102,7 @@ static void *elf64_load(struct kimage *image, char *kernel_buf, pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr); } - fdt_size = fdt_totalsize(initial_boot_params) * 2; + fdt_size = kexec_fdt_totalsize_ppc64(image); fdt = kmalloc(fdt_size, GFP_KERNEL); if (!fdt) { pr_err("Not enough memory for the device tree.\n"); diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index c69bcf9b547a..02b9e4d0dc40 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -925,6 +926,40 @@ int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, return ret; } +/** + * kexec_fdt_totalsize_ppc64 - Return the estimated size needed to setup FDT + * for kexec/kdump kernel. + * @image: kexec image being loaded. + * + * Returns the estimated size needed for kexec/kdump kernel FDT. + */ +unsigned int kexec_fdt_totalsize_ppc64(struct kimage *image) +{ + unsigned int fdt_size; + u64 usm_entries; + + /* + * The below estimate more than accounts for a typical kexec case where + * the additional space is to accommodate things like kexec cmdline, + * chosen node with properties for initrd start & end addresses and + * a property to indicate kexec boot.. + */ + fdt_size = fdt_totalsize(initial_boot_params) + (2 * COMMAND_LINE_SIZE); + if (image->type != KEXEC_TYPE_CRASH) + return fdt_size; + + /* + * For kdump kernel, also account for linux,usable-memory and + * linux,drconf-usable-memory properties. Get an approximate on the + * number of usable memory entries and use for FDT size estimation. + */ + usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) + + (2 * (resource_size(&crashk_res) / drmem_lmb_size()))); + fdt_size += (unsigned int)(usm_entries * sizeof(u64)); + + return fdt_size; +} + /** * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel * being loaded. diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 549591d9aaa2..e45644657d49 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -54,6 +54,7 @@ config KVM_BOOK3S_32 select KVM select KVM_BOOK3S_32_HANDLER select KVM_BOOK3S_PR_POSSIBLE + select PPC_FPU help Support running unmodified book3s_32 guest kernels in virtual machines on book3s_32 host processors. diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index cf52d26f49cd..25966ae3271e 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -1518,7 +1518,7 @@ int kvmppc_handle_vmx_load(struct kvm_vcpu *vcpu, return emulated; } -int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1536,7 +1536,7 @@ int kvmppc_get_vmx_dword(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1554,7 +1554,7 @@ int kvmppc_get_vmx_word(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; @@ -1572,7 +1572,7 @@ int kvmppc_get_vmx_hword(struct kvm_vcpu *vcpu, int index, u64 *val) return result; } -int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) +static int kvmppc_get_vmx_byte(struct kvm_vcpu *vcpu, int index, u64 *val) { union kvmppc_one_reg reg; int vmx_offset = 0; diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index ede093e96234..c6aebc149d14 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -904,7 +904,7 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op, if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs)) return -EFAULT; - nr_vsx_regs = size / sizeof(__vector128); + nr_vsx_regs = max(1ul, size / sizeof(__vector128)); emulate_vsx_load(op, buf, mem, cross_endian); preempt_disable(); if (reg < 32) { @@ -951,7 +951,7 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op, if (!address_ok(regs, ea, size)) return -EFAULT; - nr_vsx_regs = size / sizeof(__vector128); + nr_vsx_regs = max(1ul, size / sizeof(__vector128)); preempt_disable(); if (reg < 32) { /* FP regs + extensions */ @@ -1306,9 +1306,11 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, if ((word & 0xfe2) == 2) op->type = SYSCALL; else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && - (word & 0xfe3) == 1) + (word & 0xfe3) == 1) { /* scv */ op->type = SYSCALL_VECTORED_0; - else + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; + } else op->type = UNKNOWN; return 0; #endif @@ -1412,7 +1414,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 1: if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); @@ -1445,8 +1447,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 4: + /* + * There are very many instructions with this primary opcode + * introduced in the ISA as early as v2.03. However, the ones + * we currently emulate were all introduced with ISA 3.0 + */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; switch (word & 0x3f) { case 48: /* maddhd */ @@ -1472,7 +1479,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, * There are other instructions from ISA 3.0 with the same * primary opcode which do not have emulation support yet. */ - return -1; + goto unknown_opcode; #endif case 7: /* mulli */ @@ -1532,6 +1539,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 19: if (((word >> 1) & 0x1f) == 2) { /* addpcis */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; imm = (short) (word & 0xffc1); /* d0 + d2 fields */ imm |= (word >> 15) & 0x3e; /* d1 field */ op->val = regs->nip + (imm << 16) + 4; @@ -1844,7 +1853,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef __powerpc64__ case 265: /* modud */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = regs->gpr[ra] % regs->gpr[rb]; goto compute_done; #endif @@ -1854,7 +1863,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 267: /* moduw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (unsigned int) regs->gpr[ra] % (unsigned int) regs->gpr[rb]; goto compute_done; @@ -1891,7 +1900,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif case 755: /* darn */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; switch (ra & 0x3) { case 0: /* 32-bit conditioned */ @@ -1909,18 +1918,18 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, goto compute_done; } - return -1; + goto unknown_opcode; #ifdef __powerpc64__ case 777: /* modsd */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (long int) regs->gpr[ra] % (long int) regs->gpr[rb]; goto compute_done; #endif case 779: /* modsw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->val = (int) regs->gpr[ra] % (int) regs->gpr[rb]; goto compute_done; @@ -1997,14 +2006,14 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #endif case 538: /* cnttzw */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; val = (unsigned int) regs->gpr[rd]; op->val = (val ? __builtin_ctz(val) : 32); goto logical_done; #ifdef __powerpc64__ case 570: /* cnttzd */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; val = regs->gpr[rd]; op->val = (val ? __builtin_ctzl(val) : 64); goto logical_done; @@ -2114,7 +2123,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 890: /* extswsli with sh_5 = 0 */ case 891: /* extswsli with sh_5 = 1 */ if (!cpu_has_feature(CPU_FTR_ARCH_300)) - return -1; + goto unknown_opcode; op->type = COMPUTE + SETREG; sh = rb | ((word & 2) << 4); val = (signed int) regs->gpr[rd]; @@ -2441,6 +2450,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 268: /* lxvx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 16; @@ -2450,6 +2461,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 269: /* lxvl */ case 301: { /* lxvll */ int nb; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->ea = ra ? regs->gpr[ra] : 0; nb = regs->gpr[rb] & 0xff; @@ -2470,13 +2483,15 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 333: /* lxvpx */ if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; op->reg = VSX_REGISTER_XTP(rd); op->type = MKOP(LOAD_VSX, 0, 32); op->element_size = 32; break; case 364: /* lxvwsx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 4; @@ -2484,6 +2499,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 396: /* stxvx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 16; @@ -2493,6 +2510,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 397: /* stxvl */ case 429: { /* stxvll */ int nb; + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->ea = ra ? regs->gpr[ra] : 0; nb = regs->gpr[rb] & 0xff; @@ -2506,7 +2525,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } case 461: /* stxvpx */ if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; op->reg = VSX_REGISTER_XTP(rd); op->type = MKOP(STORE_VSX, 0, 32); op->element_size = 32; @@ -2544,6 +2563,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 781: /* lxsibzx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 1); op->element_size = 8; @@ -2551,6 +2572,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 812: /* lxvh8x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 2; @@ -2558,6 +2581,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 813: /* lxsihzx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 2); op->element_size = 8; @@ -2571,6 +2596,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 876: /* lxvb16x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(LOAD_VSX, 0, 16); op->element_size = 1; @@ -2584,6 +2611,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 909: /* stxsibx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 1); op->element_size = 8; @@ -2591,6 +2620,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 940: /* stxvh8x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 2; @@ -2598,6 +2629,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 941: /* stxsihx */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 2); op->element_size = 8; @@ -2611,6 +2644,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 1004: /* stxvb16x */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd | ((word & 1) << 5); op->type = MKOP(STORE_VSX, 0, 16); op->element_size = 1; @@ -2719,12 +2754,16 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, op->type = MKOP(LOAD_FP, 0, 16); break; case 2: /* lxsd */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd + 32; op->type = MKOP(LOAD_VSX, 0, 8); op->element_size = 8; op->vsx_flags = VSX_CHECK_VEC; break; case 3: /* lxssp */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->reg = rd + 32; op->type = MKOP(LOAD_VSX, 0, 4); op->element_size = 8; @@ -2754,7 +2793,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, #ifdef CONFIG_VSX case 6: if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; op->ea = dqform_ea(word, regs); op->reg = VSX_REGISTER_XTP(rd); op->element_size = 32; @@ -2777,6 +2816,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 1: /* lxv */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->ea = dqform_ea(word, regs); if (word & 8) op->reg = rd + 32; @@ -2787,6 +2828,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 2: /* stxsd with LSB of DS field = 0 */ case 6: /* stxsd with LSB of DS field = 1 */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->ea = dsform_ea(word, regs); op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 8); @@ -2796,6 +2839,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, case 3: /* stxssp with LSB of DS field = 0 */ case 7: /* stxssp with LSB of DS field = 1 */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->ea = dsform_ea(word, regs); op->reg = rd + 32; op->type = MKOP(STORE_VSX, 0, 4); @@ -2804,6 +2849,8 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 5: /* stxv */ + if (!cpu_has_feature(CPU_FTR_ARCH_300)) + goto unknown_opcode; op->ea = dqform_ea(word, regs); if (word & 8) op->reg = rd + 32; @@ -2833,7 +2880,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, break; case 1: /* Prefixed instructions */ if (!cpu_has_feature(CPU_FTR_ARCH_31)) - return -1; + goto unknown_opcode; prefix_r = GET_PREFIX_R(word); ra = GET_PREFIX_RA(suffix); @@ -2972,6 +3019,20 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, } + if (OP_IS_LOAD_STORE(op->type) && (op->type & UPDATE)) { + switch (GETTYPE(op->type)) { + case LOAD: + if (ra == rd) + goto unknown_opcode; + fallthrough; + case STORE: + case LOAD_FP: + case STORE_FP: + if (ra == 0) + goto unknown_opcode; + } + } + #ifdef CONFIG_VSX if ((GETTYPE(op->type) == LOAD_VSX || GETTYPE(op->type) == STORE_VSX) && @@ -2982,6 +3043,10 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; + unknown_opcode: + op->type = UNKNOWN; + return 0; + logical_done: if (word & 1) set_cr0(regs, op); diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 28206b1fe172..51f413521fde 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -212,7 +212,7 @@ static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs * if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid) *addrp = mfspr(SPRN_SDAR); - if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0) + if (is_kernel_addr(mfspr(SPRN_SDAR)) && event->attr.exclude_kernel) *addrp = 0; } @@ -506,7 +506,7 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events * * addresses, hence include a check before filtering code */ if (!(ppmu->flags & PPMU_ARCH_31) && - is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0) + is_kernel_addr(addr) && event->attr.exclude_kernel) continue; /* Branches are read most recent first (ie. mfbhrb 0 is @@ -2149,7 +2149,17 @@ static void record_and_restart(struct perf_event *event, unsigned long val, left += period; if (left <= 0) left = period; - record = siar_valid(regs); + + /* + * If address is not requested in the sample via + * PERF_SAMPLE_IP, just record that sample irrespective + * of SIAR valid check. + */ + if (event->attr.sample_type & PERF_SAMPLE_IP) + record = siar_valid(regs); + else + record = 1; + event->hw.last_period = event->hw.sample_period; } if (left < 0x80000000LL) @@ -2167,9 +2177,10 @@ static void record_and_restart(struct perf_event *event, unsigned long val, * MMCR2. Check attr.exclude_kernel and address to drop the sample in * these cases. */ - if (event->attr.exclude_kernel && record) - if (is_kernel_addr(mfspr(SPRN_SIAR))) - record = 0; + if (event->attr.exclude_kernel && + (event->attr.sample_type & PERF_SAMPLE_IP) && + is_kernel_addr(mfspr(SPRN_SIAR))) + record = 0; /* * Finally record data if requested. diff --git a/arch/powerpc/platforms/pseries/dlpar.c b/arch/powerpc/platforms/pseries/dlpar.c index 16e86ba8aa20..f6b7749d6ada 100644 --- a/arch/powerpc/platforms/pseries/dlpar.c +++ b/arch/powerpc/platforms/pseries/dlpar.c @@ -127,7 +127,6 @@ void dlpar_free_cc_nodes(struct device_node *dn) #define NEXT_PROPERTY 3 #define PREV_PARENT 4 #define MORE_MEMORY 5 -#define CALL_AGAIN -2 #define ERR_CFG_USE -9003 struct device_node *dlpar_configure_connector(__be32 drc_index, @@ -168,6 +167,9 @@ struct device_node *dlpar_configure_connector(__be32 drc_index, spin_unlock(&rtas_data_buf_lock); + if (rtas_busy_delay(rc)) + continue; + switch (rc) { case COMPLETE: break; @@ -216,9 +218,6 @@ struct device_node *dlpar_configure_connector(__be32 drc_index, last_dn = last_dn->parent; break; - case CALL_AGAIN: - break; - case MORE_MEMORY: case ERR_CFG_USE: default: diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index b3ac2455faad..637300330507 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -4,6 +4,7 @@ * Copyright 2006-2007 Michael Ellerman, IBM Corp. */ +#include #include #include #include @@ -458,8 +459,28 @@ static int rtas_setup_msi_irqs(struct pci_dev *pdev, int nvec_in, int type) return hwirq; } - virq = irq_create_mapping_affinity(NULL, hwirq, - entry->affinity); + /* + * Depending on the number of online CPUs in the original + * kernel, it is likely for CPU #0 to be offline in a kdump + * kernel. The associated IRQs in the affinity mappings + * provided by irq_create_affinity_masks() are thus not + * started by irq_startup(), as per-design for managed IRQs. + * This can be a problem with multi-queue block devices driven + * by blk-mq : such a non-started IRQ is very likely paired + * with the single queue enforced by blk-mq during kdump (see + * blk_mq_alloc_tag_set()). This causes the device to remain + * silent and likely hangs the guest at some point. + * + * We don't really care for fine-grained affinity when doing + * kdump actually : simply ignore the pre-computed affinity + * masks in this case and let the default mask with all CPUs + * be used when creating the IRQ mappings. + */ + if (is_kdump_kernel()) + virq = irq_create_mapping(NULL, hwirq); + else + virq = irq_create_mapping_affinity(NULL, hwirq, + entry->affinity); if (!virq) { pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index e0a34eb5ed3b..e6d569ae817d 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -87,7 +87,6 @@ config RISCV select PCI_MSI if PCI select RISCV_INTC select RISCV_TIMER if RISCV_SBI - select SPARSEMEM_STATIC if 32BIT select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK @@ -148,7 +147,8 @@ config ARCH_FLATMEM_ENABLE config ARCH_SPARSEMEM_ENABLE def_bool y depends on MMU - select SPARSEMEM_VMEMMAP_ENABLE + select SPARSEMEM_STATIC if 32BIT && SPARSMEM + select SPARSEMEM_VMEMMAP_ENABLE if 64BIT config ARCH_SELECT_MEMORY_MODEL def_bool ARCH_SPARSEMEM_ENABLE diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 653edb25d495..c0fdb05ffa0b 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -51,10 +51,10 @@ enum sbi_ext_rfence_fid { SBI_EXT_RFENCE_REMOTE_FENCE_I = 0, SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, - SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA, SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID, - SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA, + SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA, SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID, + SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA, }; enum sbi_ext_hsm_fid { diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index c7c0655dd45b..968202561d47 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -147,7 +147,8 @@ static void __init init_resources(void) bss_res.end = __pa_symbol(__bss_stop) - 1; bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt) * sizeof(*mem_res); + /* + 1 as memblock_alloc() might increase memblock.reserved.cnt */ + mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt + 1) * sizeof(*mem_res); mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES); if (!mem_res) panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 0cfd6da784f8..71a315e73cbe 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -32,9 +32,10 @@ CPPFLAGS_vdso.lds += -P -C -U$(ARCH) # Disable -pg to prevent insert call site CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os -# Disable gcov profiling for VDSO code +# Disable profiling and instrumentation for VDSO code GCOV_PROFILE := n KCOV_INSTRUMENT := n +KASAN_SANITIZE := n # Force dependency $(obj)/vdso.o: $(obj)/vdso.so diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index f9f9568d689e..f81f813b9603 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -226,8 +226,6 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; -#define MAX_EARLY_MAPPING_SIZE SZ_128M - pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) @@ -302,13 +300,7 @@ static void __init create_pte_mapping(pte_t *ptep, pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss; pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; - -#if MAX_EARLY_MAPPING_SIZE < PGDIR_SIZE -#define NUM_EARLY_PMDS 1UL -#else -#define NUM_EARLY_PMDS (1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE) -#endif -pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE); +pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); static pmd_t *__init get_pmd_virt_early(phys_addr_t pa) @@ -330,11 +322,9 @@ static pmd_t *get_pmd_virt_late(phys_addr_t pa) static phys_addr_t __init alloc_pmd_early(uintptr_t va) { - uintptr_t pmd_num; + BUG_ON((va - PAGE_OFFSET) >> PGDIR_SHIFT); - pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT; - BUG_ON(pmd_num >= NUM_EARLY_PMDS); - return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD]; + return (uintptr_t)early_pmd; } static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va) @@ -452,7 +442,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) uintptr_t va, pa, end_va; uintptr_t load_pa = (uintptr_t)(&_start); uintptr_t load_sz = (uintptr_t)(&_end) - load_pa; - uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE); + uintptr_t map_size; #ifndef __PAGETABLE_PMD_FOLDED pmd_t fix_bmap_spmd, fix_bmap_epmd; #endif @@ -464,12 +454,11 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) * Enforce boot alignment requirements of RV32 and * RV64 by only allowing PMD or PGD mappings. */ - BUG_ON(map_size == PAGE_SIZE); + map_size = PMD_SIZE; /* Sanity check alignment and size */ BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0); BUG_ON((load_pa % map_size) != 0); - BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE); pt_ops.alloc_pte = alloc_pte_early; pt_ops.get_pte_virt = get_pte_virt_early; diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 212628932ddc..a75d94a9bcb2 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -201,8 +201,8 @@ extern unsigned int s390_pci_no_rid; Prototypes ----------------------------------------------------------------------------- */ /* Base stuff */ -int zpci_create_device(struct zpci_dev *); -void zpci_remove_device(struct zpci_dev *zdev); +int zpci_create_device(u32 fid, u32 fh, enum zpci_state state); +void zpci_remove_device(struct zpci_dev *zdev, bool set_error); int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); @@ -212,7 +212,7 @@ void zpci_remove_reserved_devices(void); /* CLP */ int clp_setup_writeback_mio(void); int clp_scan_pci_devices(void); -int clp_add_pci_device(u32, u32, int); +int clp_query_pci_fn(struct zpci_dev *zdev); int clp_enable_fh(struct zpci_dev *, u8); int clp_disable_fh(struct zpci_dev *); int clp_get_state(u32 fid, enum zpci_state *state); diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 27c763014114..1bae4a65416b 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -770,7 +770,7 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) { struct sclp_core_entry *core; - cpumask_t avail; + static cpumask_t avail; bool configured; u16 core_id; int nr, i; diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 5aaa2ca6a928..9b3c5978b668 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -136,7 +136,8 @@ static int do_account_vtime(struct task_struct *tsk) " stck %1" /* Store current tod clock value */ #endif : "=Q" (S390_lowcore.last_update_timer), - "=Q" (S390_lowcore.last_update_clock)); + "=Q" (S390_lowcore.last_update_clock) + : : "cc"); clock = S390_lowcore.last_update_clock - clock; timer -= S390_lowcore.last_update_timer; @@ -216,7 +217,7 @@ void vtime_flush(struct task_struct *tsk) avg_steal = S390_lowcore.avg_steal_timer / 2; if ((s64) steal > 0) { S390_lowcore.steal_timer = 0; - account_steal_time(steal); + account_steal_time(cputime_to_nsecs(steal)); avg_steal += steal; } S390_lowcore.avg_steal_timer = avg_steal; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 41df8fcfddde..91064077526d 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -682,56 +682,101 @@ int zpci_disable_device(struct zpci_dev *zdev) } EXPORT_SYMBOL_GPL(zpci_disable_device); -void zpci_remove_device(struct zpci_dev *zdev) +/* zpci_remove_device - Removes the given zdev from the PCI core + * @zdev: the zdev to be removed from the PCI core + * @set_error: if true the device's error state is set to permanent failure + * + * Sets a zPCI device to a configured but offline state; the zPCI + * device is still accessible through its hotplug slot and the zPCI + * API but is removed from the common code PCI bus, making it + * no longer available to drivers. + */ +void zpci_remove_device(struct zpci_dev *zdev, bool set_error) { struct zpci_bus *zbus = zdev->zbus; struct pci_dev *pdev; + if (!zdev->zbus->bus) + return; + pdev = pci_get_slot(zbus->bus, zdev->devfn); if (pdev) { - if (pdev->is_virtfn) - return zpci_iov_remove_virtfn(pdev, zdev->vfn); + if (set_error) + pdev->error_state = pci_channel_io_perm_failure; + if (pdev->is_virtfn) { + zpci_iov_remove_virtfn(pdev, zdev->vfn); + /* balance pci_get_slot */ + pci_dev_put(pdev); + return; + } pci_stop_and_remove_bus_device_locked(pdev); + /* balance pci_get_slot */ + pci_dev_put(pdev); } } -int zpci_create_device(struct zpci_dev *zdev) +/** + * zpci_create_device() - Create a new zpci_dev and add it to the zbus + * @fid: Function ID of the device to be created + * @fh: Current Function Handle of the device to be created + * @state: Initial state after creation either Standby or Configured + * + * Creates a new zpci device and adds it to its, possibly newly created, zbus + * as well as zpci_list. + * + * Returns: 0 on success, an error value otherwise + */ +int zpci_create_device(u32 fid, u32 fh, enum zpci_state state) { + struct zpci_dev *zdev; int rc; - kref_init(&zdev->kref); + zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, state); + zdev = kzalloc(sizeof(*zdev), GFP_KERNEL); + if (!zdev) + return -ENOMEM; - spin_lock(&zpci_list_lock); - list_add_tail(&zdev->entry, &zpci_list); - spin_unlock(&zpci_list_lock); + /* FID and Function Handle are the static/dynamic identifiers */ + zdev->fid = fid; + zdev->fh = fh; - rc = zpci_init_iommu(zdev); + /* Query function properties and update zdev */ + rc = clp_query_pci_fn(zdev); if (rc) - goto out; + goto error; + zdev->state = state; + kref_init(&zdev->kref); mutex_init(&zdev->lock); + + rc = zpci_init_iommu(zdev); + if (rc) + goto error; + if (zdev->state == ZPCI_FN_STATE_CONFIGURED) { rc = zpci_enable_device(zdev); if (rc) - goto out_destroy_iommu; + goto error_destroy_iommu; } rc = zpci_bus_device_register(zdev, &pci_root_ops); if (rc) - goto out_disable; + goto error_disable; + + spin_lock(&zpci_list_lock); + list_add_tail(&zdev->entry, &zpci_list); + spin_unlock(&zpci_list_lock); return 0; -out_disable: +error_disable: if (zdev->state == ZPCI_FN_STATE_ONLINE) zpci_disable_device(zdev); - -out_destroy_iommu: +error_destroy_iommu: zpci_destroy_iommu(zdev); -out: - spin_lock(&zpci_list_lock); - list_del(&zdev->entry); - spin_unlock(&zpci_list_lock); +error: + zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc); + kfree(zdev); return rc; } @@ -740,7 +785,7 @@ void zpci_release_device(struct kref *kref) struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); if (zdev->zbus->bus) - zpci_remove_device(zdev); + zpci_remove_device(zdev, false); switch (zdev->state) { case ZPCI_FN_STATE_ONLINE: diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 153720d21ae7..d3331596ddbe 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -181,7 +181,7 @@ static int clp_store_query_pci_fn(struct zpci_dev *zdev, return 0; } -static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) +int clp_query_pci_fn(struct zpci_dev *zdev) { struct clp_req_rsp_query_pci *rrb; int rc; @@ -194,7 +194,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) rrb->request.hdr.len = sizeof(rrb->request); rrb->request.hdr.cmd = CLP_QUERY_PCI_FN; rrb->response.hdr.len = sizeof(rrb->response); - rrb->request.fh = fh; + rrb->request.fh = zdev->fh; rc = clp_req(rrb, CLP_LPS_PCI); if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { @@ -212,40 +212,6 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) return rc; } -int clp_add_pci_device(u32 fid, u32 fh, int configured) -{ - struct zpci_dev *zdev; - int rc = -ENOMEM; - - zpci_dbg(3, "add fid:%x, fh:%x, c:%d\n", fid, fh, configured); - zdev = kzalloc(sizeof(*zdev), GFP_KERNEL); - if (!zdev) - goto error; - - zdev->fh = fh; - zdev->fid = fid; - - /* Query function properties and update zdev */ - rc = clp_query_pci_fn(zdev, fh); - if (rc) - goto error; - - if (configured) - zdev->state = ZPCI_FN_STATE_CONFIGURED; - else - zdev->state = ZPCI_FN_STATE_STANDBY; - - rc = zpci_create_device(zdev); - if (rc) - goto error; - return 0; - -error: - zpci_dbg(0, "add fid:%x, rc:%d\n", fid, rc); - kfree(zdev); - return rc; -} - static int clp_refresh_fh(u32 fid); /* * Enable/Disable a given PCI function and update its function handle if @@ -408,7 +374,7 @@ static void __clp_add(struct clp_fh_list_entry *entry, void *data) zdev = get_zdev_by_fid(entry->fid); if (!zdev) - clp_add_pci_device(entry->fid, entry->fh, entry->config_state); + zpci_create_device(entry->fid, entry->fh, entry->config_state); } int clp_scan_pci_devices(void) diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index 9a6bae503fe6..ac0c65cdd69d 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -76,20 +76,17 @@ void zpci_event_error(void *data) static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); - struct pci_dev *pdev = NULL; enum zpci_state state; + struct pci_dev *pdev; int ret; - if (zdev && zdev->zbus && zdev->zbus->bus) - pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); - zpci_err("avail CCDF:\n"); zpci_err_hex(ccdf, sizeof(*ccdf)); switch (ccdf->pec) { case 0x0301: /* Reserved|Standby -> Configured */ if (!zdev) { - ret = clp_add_pci_device(ccdf->fid, ccdf->fh, 1); + zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED); break; } /* the configuration request may be stale */ @@ -116,7 +113,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) break; case 0x0302: /* Reserved -> Standby */ if (!zdev) { - clp_add_pci_device(ccdf->fid, ccdf->fh, 0); + zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY); break; } zdev->fh = ccdf->fh; @@ -124,8 +121,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0303: /* Deconfiguration requested */ if (!zdev) break; - if (pdev) - zpci_remove_device(zdev); + zpci_remove_device(zdev, false); ret = zpci_disable_device(zdev); if (ret) @@ -140,12 +136,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0304: /* Configured -> Standby|Reserved */ if (!zdev) break; - if (pdev) { - /* Give the driver a hint that the function is - * already unusable. */ - pdev->error_state = pci_channel_io_perm_failure; - zpci_remove_device(zdev); - } + /* Give the driver a hint that the function is + * already unusable. + */ + zpci_remove_device(zdev, true); zdev->fh = ccdf->fh; zpci_disable_device(zdev); diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 18f2d10c3176..474617b88648 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -170,7 +170,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, if (!(vma->vm_flags & VM_WRITE)) goto out_unlock_mmap; - ret = follow_pte(vma->vm_mm, mmio_addr, NULL, &ptep, NULL, &ptl); + ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl); if (ret) goto out_unlock_mmap; @@ -311,7 +311,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, if (!(vma->vm_flags & VM_WRITE)) goto out_unlock_mmap; - ret = follow_pte(vma->vm_mm, mmio_addr, NULL, &ptep, NULL, &ptl); + ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl); if (ret) goto out_unlock_mmap; diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index c9c34dc52b7d..639dde28124a 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -494,7 +494,7 @@ config COMPAT bool depends on SPARC64 default y - select COMPAT_BINFMT_ELF + select COMPAT_BINFMT_ELF if BINFMT_ELF select HAVE_UID16 select ARCH_WANT_OLD_COMPAT_IPC select COMPAT_OLD_SIGACTION diff --git a/arch/sparc/include/asm/mman.h b/arch/sparc/include/asm/mman.h index f94532f25db1..274217e7ed70 100644 --- a/arch/sparc/include/asm/mman.h +++ b/arch/sparc/include/asm/mman.h @@ -57,35 +57,39 @@ static inline int sparc_validate_prot(unsigned long prot, unsigned long addr) { if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_ADI)) return 0; - if (prot & PROT_ADI) { - if (!adi_capable()) - return 0; + return 1; +} - if (addr) { - struct vm_area_struct *vma; +#define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags) +/* arch_validate_flags() - Ensure combination of flags is valid for a + * VMA. + */ +static inline bool arch_validate_flags(unsigned long vm_flags) +{ + /* If ADI is being enabled on this VMA, check for ADI + * capability on the platform and ensure VMA is suitable + * for ADI + */ + if (vm_flags & VM_SPARC_ADI) { + if (!adi_capable()) + return false; - vma = find_vma(current->mm, addr); - if (vma) { - /* ADI can not be enabled on PFN - * mapped pages - */ - if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) - return 0; + /* ADI can not be enabled on PFN mapped pages */ + if (vm_flags & (VM_PFNMAP | VM_MIXEDMAP)) + return false; - /* Mergeable pages can become unmergeable - * if ADI is enabled on them even if they - * have identical data on them. This can be - * because ADI enabled pages with identical - * data may still not have identical ADI - * tags on them. Disallow ADI on mergeable - * pages. - */ - if (vma->vm_flags & VM_MERGEABLE) - return 0; - } - } + /* Mergeable pages can become unmergeable + * if ADI is enabled on them even if they + * have identical data on them. This can be + * because ADI enabled pages with identical + * data may still not have identical ADI + * tags on them. Disallow ADI on mergeable + * pages. + */ + if (vm_flags & VM_MERGEABLE) + return false; } - return 1; + return true; } #endif /* CONFIG_SPARC64 */ diff --git a/arch/sparc/kernel/led.c b/arch/sparc/kernel/led.c index bd48575172c3..3a66e62eb2a0 100644 --- a/arch/sparc/kernel/led.c +++ b/arch/sparc/kernel/led.c @@ -50,6 +50,7 @@ static void led_blink(struct timer_list *unused) add_timer(&led_blink_timer); } +#ifdef CONFIG_PROC_FS static int led_proc_show(struct seq_file *m, void *v) { if (get_auxio() & AUXIO_LED) @@ -111,6 +112,7 @@ static const struct proc_ops led_proc_ops = { .proc_release = single_release, .proc_write = led_proc_write, }; +#endif static struct proc_dir_entry *led; diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index d92e5eaa4c1d..a850dccd78ea 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -275,14 +275,13 @@ bool is_no_fault_exception(struct pt_regs *regs) asi = (regs->tstate >> 24); /* saved %asi */ else asi = (insn >> 5); /* immediate asi */ - if ((asi & 0xf2) == ASI_PNF) { - if (insn & 0x1000000) { /* op3[5:4]=3 */ - handle_ldf_stq(insn, regs); - return true; - } else if (insn & 0x200000) { /* op3[2], stores */ + if ((asi & 0xf6) == ASI_PNF) { + if (insn & 0x200000) /* op3[2], stores */ return false; - } - handle_ld_nf(insn, regs); + if (insn & 0x1000000) /* op3[5:4]=3 (fp) */ + handle_ldf_stq(insn, regs); + else + handle_ld_nf(insn, regs); return true; } } diff --git a/arch/sparc/lib/memset.S b/arch/sparc/lib/memset.S index b89d42b29e34..f427f34b8b79 100644 --- a/arch/sparc/lib/memset.S +++ b/arch/sparc/lib/memset.S @@ -142,6 +142,7 @@ __bzero: ZERO_LAST_BLOCKS(%o0, 0x48, %g2) ZERO_LAST_BLOCKS(%o0, 0x08, %g2) 13: + EXT(12b, 13b, 21f) be 8f andcc %o1, 4, %g0 diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c index eb2946b1df8a..6139c5700ccc 100644 --- a/arch/sparc/mm/init_32.c +++ b/arch/sparc/mm/init_32.c @@ -197,6 +197,9 @@ unsigned long __init bootmem_init(unsigned long *pages_avail) size = memblock_phys_mem_size() - memblock_reserved_size(); *pages_avail = (size >> PAGE_SHIFT) - high_pages; + /* Only allow low memory to be allocated via memblock allocation */ + memblock_set_current_limit(max_low_pfn << PAGE_SHIFT); + return max_pfn; } diff --git a/arch/um/include/shared/skas/mm_id.h b/arch/um/include/shared/skas/mm_id.h index 4337b4ced095..e82e203f5f41 100644 --- a/arch/um/include/shared/skas/mm_id.h +++ b/arch/um/include/shared/skas/mm_id.h @@ -12,6 +12,7 @@ struct mm_id { int pid; } u; unsigned long stack; + int kill; }; #endif diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 61776790cd67..5be1b0da9f3b 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -125,6 +125,9 @@ static int add_mmap(unsigned long virt, unsigned long phys, unsigned long len, struct host_vm_op *last; int fd = -1, ret = 0; + if (virt + len > STUB_START && virt < STUB_END) + return -EINVAL; + if (hvc->userspace) fd = phys_mapping(phys, &offset); else @@ -162,7 +165,7 @@ static int add_munmap(unsigned long addr, unsigned long len, struct host_vm_op *last; int ret = 0; - if ((addr >= STUB_START) && (addr < STUB_END)) + if (addr + len > STUB_START && addr < STUB_END) return -EINVAL; if (hvc->index != 0) { @@ -192,6 +195,9 @@ static int add_mprotect(unsigned long addr, unsigned long len, struct host_vm_op *last; int ret = 0; + if (addr + len > STUB_START && addr < STUB_END) + return -EINVAL; + if (hvc->index != 0) { last = &hvc->ops[hvc->index - 1]; if ((last->type == MPROTECT) && @@ -346,12 +352,11 @@ void fix_range_common(struct mm_struct *mm, unsigned long start_addr, /* This is not an else because ret is modified above */ if (ret) { + struct mm_id *mm_idp = ¤t->mm->context.id; + printk(KERN_ERR "fix_range_common: failed, killing current " "process: %d\n", task_tgid_vnr(current)); - /* We are under mmap_lock, release it such that current can terminate */ - mmap_write_unlock(current->mm); - force_sig(SIGKILL); - do_signal(¤t->thread.regs); + mm_idp->kill = 1; } } @@ -472,6 +477,10 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long address) struct mm_id *mm_id; address &= PAGE_MASK; + + if (address >= STUB_START && address < STUB_END) + goto kill; + pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) goto kill; diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index 0621d521208e..02c4741ade5e 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -249,6 +249,7 @@ static int userspace_tramp(void *stack) } int userspace_pid[NR_CPUS]; +int kill_userspace_mm[NR_CPUS]; /** * start_userspace() - prepare a new userspace process @@ -342,6 +343,8 @@ void userspace(struct uml_pt_regs *regs, unsigned long *aux_fp_regs) interrupt_end(); while (1) { + if (kill_userspace_mm[0]) + fatal_sigsegv(); /* * This can legitimately fail if the process loads a @@ -663,4 +666,5 @@ void reboot_skas(void) void __switch_mm(struct mm_id *mm_idp) { userspace_pid[0] = mm_idp->u.pid; + kill_userspace_mm[0] = mm_idp->kill; } diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 814fe0d349b0..a79ff6379479 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -157,7 +157,7 @@ config MPENTIUM4 config MK6 - bool "K6/K6-II/K6-III" + bool "AMD K6/K6-II/K6-III" depends on X86_32 help Select this for an AMD K6-family processor. Enables use of @@ -165,7 +165,7 @@ config MK6 flags to GCC. config MK7 - bool "Athlon/Duron/K7" + bool "AMD Athlon/Duron/K7" depends on X86_32 help Select this for an AMD Athlon K7-family processor. Enables use of @@ -173,12 +173,98 @@ config MK7 flags to GCC. config MK8 - bool "Opteron/Athlon64/Hammer/K8" + bool "AMD Opteron/Athlon64/Hammer/K8" help Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables use of some extended instructions, and passes appropriate optimization flags to GCC. +config MK8SSE3 + bool "AMD Opteron/Athlon64/Hammer/K8 with SSE3" + help + Select this for improved AMD Opteron or Athlon64 Hammer-family processors. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MK10 + bool "AMD 61xx/7x50/PhenomX3/X4/II/K10" + help + Select this for an AMD 61xx Eight-Core Magny-Cours, Athlon X2 7x50, + Phenom X3/X4/II, Athlon II X2/X3/X4, or Turion II-family processor. + Enables use of some extended instructions, and passes appropriate + optimization flags to GCC. + +config MBARCELONA + bool "AMD Barcelona" + help + Select this for AMD Family 10h Barcelona processors. + + Enables -march=barcelona + +config MBOBCAT + bool "AMD Bobcat" + help + Select this for AMD Family 14h Bobcat processors. + + Enables -march=btver1 + +config MJAGUAR + bool "AMD Jaguar" + help + Select this for AMD Family 16h Jaguar processors. + + Enables -march=btver2 + +config MBULLDOZER + bool "AMD Bulldozer" + help + Select this for AMD Family 15h Bulldozer processors. + + Enables -march=bdver1 + +config MPILEDRIVER + bool "AMD Piledriver" + help + Select this for AMD Family 15h Piledriver processors. + + Enables -march=bdver2 + +config MSTEAMROLLER + bool "AMD Steamroller" + help + Select this for AMD Family 15h Steamroller processors. + + Enables -march=bdver3 + +config MEXCAVATOR + bool "AMD Excavator" + help + Select this for AMD Family 15h Excavator processors. + + Enables -march=bdver4 + +config MZEN + bool "AMD Zen" + help + Select this for AMD Family 17h Zen processors. + + Enables -march=znver1 + +config MZEN2 + bool "AMD Zen 2" + help + Select this for AMD Family 17h Zen 2 processors. + + Enables -march=znver2 + +config MZEN3 + bool "AMD Zen 3" + depends on GCC_VERSION > 110000 + help + Select this for AMD Family 19h Zen 3 processors. + + Enables -march=znver3 + config MCRUSOE bool "Crusoe" depends on X86_32 @@ -270,7 +356,7 @@ config MPSC in /proc/cpuinfo. Family 15 is an older Xeon, Family 6 a newer one. config MCORE2 - bool "Core 2/newer Xeon" + bool "Intel Core 2" help Select this for Intel Core 2 and newer Core 2 Xeons (Xeon 51xx and @@ -278,6 +364,8 @@ config MCORE2 family in /proc/cpuinfo. Newer ones have 6 and older ones 15 (not a typo) + Enables -march=core2 + config MATOM bool "Intel Atom" help @@ -287,6 +375,172 @@ config MATOM accordingly optimized code. Use a recent GCC with specific Atom support in order to fully benefit from selecting this option. +config MNEHALEM + bool "Intel Nehalem" + select X86_P6_NOP + help + + Select this for 1st Gen Core processors in the Nehalem family. + + Enables -march=nehalem + +config MWESTMERE + bool "Intel Westmere" + select X86_P6_NOP + help + + Select this for the Intel Westmere formerly Nehalem-C family. + + Enables -march=westmere + +config MSILVERMONT + bool "Intel Silvermont" + select X86_P6_NOP + help + + Select this for the Intel Silvermont platform. + + Enables -march=silvermont + +config MGOLDMONT + bool "Intel Goldmont" + select X86_P6_NOP + help + + Select this for the Intel Goldmont platform including Apollo Lake and Denverton. + + Enables -march=goldmont + +config MGOLDMONTPLUS + bool "Intel Goldmont Plus" + select X86_P6_NOP + help + + Select this for the Intel Goldmont Plus platform including Gemini Lake. + + Enables -march=goldmont-plus + +config MSANDYBRIDGE + bool "Intel Sandy Bridge" + select X86_P6_NOP + help + + Select this for 2nd Gen Core processors in the Sandy Bridge family. + + Enables -march=sandybridge + +config MIVYBRIDGE + bool "Intel Ivy Bridge" + select X86_P6_NOP + help + + Select this for 3rd Gen Core processors in the Ivy Bridge family. + + Enables -march=ivybridge + +config MHASWELL + bool "Intel Haswell" + select X86_P6_NOP + help + + Select this for 4th Gen Core processors in the Haswell family. + + Enables -march=haswell + +config MBROADWELL + bool "Intel Broadwell" + select X86_P6_NOP + help + + Select this for 5th Gen Core processors in the Broadwell family. + + Enables -march=broadwell + +config MSKYLAKE + bool "Intel Skylake" + select X86_P6_NOP + help + + Select this for 6th Gen Core processors in the Skylake family. + + Enables -march=skylake + +config MSKYLAKEX + bool "Intel Skylake X" + select X86_P6_NOP + help + + Select this for 6th Gen Core processors in the Skylake X family. + + Enables -march=skylake-avx512 + +config MCANNONLAKE + bool "Intel Cannon Lake" + select X86_P6_NOP + help + + Select this for 8th Gen Core processors + + Enables -march=cannonlake + +config MICELAKE + bool "Intel Ice Lake" + select X86_P6_NOP + help + + Select this for 10th Gen Core processors in the Ice Lake family. + + Enables -march=icelake-client + +config MCASCADELAKE + bool "Intel Cascade Lake" + select X86_P6_NOP + help + + Select this for Xeon processors in the Cascade Lake family. + + Enables -march=cascadelake + +config MCOOPERLAKE + bool "Intel Cooper Lake" + depends on GCC_VERSION > 100100 + select X86_P6_NOP + help + + Select this for Xeon processors in the Cooper Lake family. + + Enables -march=cooperlake + +config MTIGERLAKE + bool "Intel Tiger Lake" + depends on GCC_VERSION > 100100 + select X86_P6_NOP + help + + Select this for third-generation 10 nm process processors in the Tiger Lake family. + + Enables -march=tigerlake + +config MSAPPHIRERAPIDS + bool "Intel Sapphire Rapids" + depends on GCC_VERSION > 110000 + select X86_P6_NOP + help + + Select this for third-generation 10 nm process processors in the Sapphire Rapids family. + + Enables -march=sapphirerapids + +config MALDERLAKE + bool "Intel Alder Lake" + depends on GCC_VERSION > 110000 + select X86_P6_NOP + help + + Select this for twelfth-generation processors in the Alder Lake family. + + Enables -march=alderlake + config GENERIC_CPU bool "Generic-x86-64" depends on X86_64 @@ -294,6 +548,50 @@ config GENERIC_CPU Generic x86-64 CPU. Run equally well on all x86-64 CPUs. +config GENERIC_CPU2 + bool "Generic-x86-64-v2" + depends on GCC_VERSION > 110000 + depends on X86_64 + help + Generic x86-64 CPU. + Run equally well on all x86-64 CPUs with min support of x86-64-v2. + +config GENERIC_CPU3 + bool "Generic-x86-64-v3" + depends on GCC_VERSION > 110000 + depends on X86_64 + help + Generic x86-64-v3 CPU with v3 instructions. + Run equally well on all x86-64 CPUs with min support of x86-64-v3. + +config GENERIC_CPU4 + bool "Generic-x86-64-v4" + depends on GCC_VERSION > 110000 + depends on X86_64 + help + Generic x86-64 CPU with v4 instructions. + Run equally well on all x86-64 CPUs with min support of x86-64-v4. + +config MNATIVE_INTEL + bool "Intel-Native optimizations autodetected by GCC" + help + + GCC 4.2 and above support -march=native, which automatically detects + the optimum settings to use based on your processor. Do NOT use this + for AMD CPUs. Intel Only! + + Enables -march=native + +config MNATIVE_AMD + bool "AMD-Native optimizations autodetected by GCC" + help + + GCC 4.2 and above support -march=native, which automatically detects + the optimum settings to use based on your processor. Do NOT use this + for Intel CPUs. AMD Only! + + Enables -march=native + endchoice config X86_GENERIC @@ -318,7 +616,7 @@ config X86_INTERNODE_CACHE_SHIFT config X86_L1_CACHE_SHIFT int default "7" if MPENTIUM4 || MPSC - default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU + default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD || X86_GENERIC || GENERIC_CPU || GENERIC_CPU2 || GENERIC_CPU3 || GENERIC_CPU4 default "4" if MELAN || M486SX || M486 || MGEODEGX1 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX @@ -336,11 +634,11 @@ config X86_ALIGNMENT_16 config X86_INTEL_USERCOPY def_bool y - depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MALDERLAKE || MNATIVE_INTEL config X86_USE_PPRO_CHECKSUM def_bool y - depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM + depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MVIAC7 || MEFFICEON || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD config X86_USE_3DNOW def_bool y @@ -360,26 +658,26 @@ config X86_USE_3DNOW config X86_P6_NOP def_bool y depends on X86_64 - depends on (MCORE2 || MPENTIUM4 || MPSC) + depends on (MCORE2 || MPENTIUM4 || MPSC || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MALDERLAKE || MNATIVE_INTEL) config X86_TSC def_bool y - depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 + depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD) || X86_64 config X86_CMPXCHG64 def_bool y - depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8 + depends on X86_PAE || X86_64 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586TSC || M586MMX || MATOM || MGEODE_LX || MGEODEGX1 || MK6 || MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD # this should be set for all -march=.. options where the compiler # generates cmov. config X86_CMOV def_bool y - depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX) + depends on (MK8 || MK7 || MCORE2 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MCRUSOE || MEFFICEON || X86_64 || MATOM || MGEODE_LX || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD) config X86_MINIMUM_CPU_FAMILY int default "64" if X86_64 - default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8) + default "6" if X86_32 && (MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7 || MEFFICEON || MATOM || MCRUSOE || MCORE2 || MK7 || MK8 || MK8SSE3 || MK10 || MBARCELONA || MBOBCAT || MJAGUAR || MBULLDOZER || MPILEDRIVER || MSTEAMROLLER || MEXCAVATOR || MZEN || MZEN2 || MZEN3 || MNEHALEM || MWESTMERE || MSILVERMONT || MGOLDMONT || MGOLDMONTPLUS || MSANDYBRIDGE || MIVYBRIDGE || MHASWELL || MBROADWELL || MSKYLAKE || MSKYLAKEX || MCANNONLAKE || MICELAKE || MCASCADELAKE || MCOOPERLAKE || MTIGERLAKE || MSAPPHIRERAPIDS || MALDERLAKE || MNATIVE_INTEL || MNATIVE_AMD) default "5" if X86_32 && X86_CMPXCHG64 default "4" diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 30920d70b48b..c5416da97034 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -113,11 +113,47 @@ else # FIXME - should be integrated in Makefile.cpu (Makefile_32.cpu) cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) - - cflags-$(CONFIG_MCORE2) += \ - $(call cc-option,-march=core2,$(call cc-option,-mtune=generic)) - cflags-$(CONFIG_MATOM) += $(call cc-option,-march=atom) \ - $(call cc-option,-mtune=atom,$(call cc-option,-mtune=generic)) + cflags-$(CONFIG_MK8SSE3) += $(call cc-option,-march=k8-sse3) + cflags-$(CONFIG_MK10) += $(call cc-option,-march=amdfam10) + cflags-$(CONFIG_MBARCELONA) += $(call cc-option,-march=barcelona) + cflags-$(CONFIG_MBOBCAT) += $(call cc-option,-march=btver1) + cflags-$(CONFIG_MJAGUAR) += $(call cc-option,-march=btver2) + cflags-$(CONFIG_MBULLDOZER) += $(call cc-option,-march=bdver1) + cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-march=bdver2) + cflags-$(CONFIG_MPILEDRIVER) += $(call cc-option,-mno-tbm) + cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-march=bdver3) + cflags-$(CONFIG_MSTEAMROLLER) += $(call cc-option,-mno-tbm) + cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-march=bdver4) + cflags-$(CONFIG_MEXCAVATOR) += $(call cc-option,-mno-tbm) + cflags-$(CONFIG_MZEN) += $(call cc-option,-march=znver1) + cflags-$(CONFIG_MZEN2) += $(call cc-option,-march=znver2) + cflags-$(CONFIG_MZEN3) += $(call cc-option,-march=znver3) + + cflags-$(CONFIG_MNATIVE_INTEL) += $(call cc-option,-march=native) + cflags-$(CONFIG_MNATIVE_AMD) += $(call cc-option,-march=native) + cflags-$(CONFIG_MATOM) += $(call cc-option,-march=bonnell) + cflags-$(CONFIG_MCORE2) += $(call cc-option,-march=core2) + cflags-$(CONFIG_MNEHALEM) += $(call cc-option,-march=nehalem) + cflags-$(CONFIG_MWESTMERE) += $(call cc-option,-march=westmere) + cflags-$(CONFIG_MSILVERMONT) += $(call cc-option,-march=silvermont) + cflags-$(CONFIG_MGOLDMONT) += $(call cc-option,-march=goldmont) + cflags-$(CONFIG_MGOLDMONTPLUS) += $(call cc-option,-march=goldmont-plus) + cflags-$(CONFIG_MSANDYBRIDGE) += $(call cc-option,-march=sandybridge) + cflags-$(CONFIG_MIVYBRIDGE) += $(call cc-option,-march=ivybridge) + cflags-$(CONFIG_MHASWELL) += $(call cc-option,-march=haswell) + cflags-$(CONFIG_MBROADWELL) += $(call cc-option,-march=broadwell) + cflags-$(CONFIG_MSKYLAKE) += $(call cc-option,-march=skylake) + cflags-$(CONFIG_MSKYLAKEX) += $(call cc-option,-march=skylake-avx512) + cflags-$(CONFIG_MCANNONLAKE) += $(call cc-option,-march=cannonlake) + cflags-$(CONFIG_MICELAKE) += $(call cc-option,-march=icelake-client) + cflags-$(CONFIG_MCASCADELAKE) += $(call cc-option,-march=cascadelake) + cflags-$(CONFIG_MCOOPERLAKE) += $(call cc-option,-march=cooperlake) + cflags-$(CONFIG_MTIGERLAKE) += $(call cc-option,-march=tigerlake) + cflags-$(CONFIG_MSAPPHIRERAPIDS) += $(call cc-option,-march=sapphirerapids) + cflags-$(CONFIG_MALDERLAKE) += $(call cc-option,-march=alderlake) + cflags-$(CONFIG_GENERIC_CPU2) += $(call cc-option,-march=x86-64-v2) + cflags-$(CONFIG_GENERIC_CPU3) += $(call cc-option,-march=x86-64-v3) + cflags-$(CONFIG_GENERIC_CPU4) += $(call cc-option,-march=x86-64-v4) cflags-$(CONFIG_GENERIC_CPU) += $(call cc-option,-mtune=generic) KBUILD_CFLAGS += $(cflags-y) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index d1436c37008b..57aef3f5a81e 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -2715,25 +2715,18 @@ SYM_FUNC_END(aesni_ctr_enc) pxor CTR, IV; /* - * void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *dst, - * const u8 *src, bool enc, le128 *iv) + * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst, + * const u8 *src, unsigned int len, le128 *iv) */ -SYM_FUNC_START(aesni_xts_crypt8) +SYM_FUNC_START(aesni_xts_encrypt) FRAME_BEGIN - testb %cl, %cl - movl $0, %ecx - movl $240, %r10d - leaq _aesni_enc4, %r11 - leaq _aesni_dec4, %rax - cmovel %r10d, %ecx - cmoveq %rax, %r11 movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK movups (IVP), IV mov 480(KEYP), KLEN - addq %rcx, KEYP +.Lxts_enc_loop4: movdqa IV, STATE1 movdqu 0x00(INP), INC pxor INC, STATE1 @@ -2757,71 +2750,103 @@ SYM_FUNC_START(aesni_xts_crypt8) pxor INC, STATE4 movdqu IV, 0x30(OUTP) - CALL_NOSPEC r11 + call _aesni_enc4 movdqu 0x00(OUTP), INC pxor INC, STATE1 movdqu STATE1, 0x00(OUTP) - _aesni_gf128mul_x_ble() - movdqa IV, STATE1 - movdqu 0x40(INP), INC - pxor INC, STATE1 - movdqu IV, 0x40(OUTP) - movdqu 0x10(OUTP), INC pxor INC, STATE2 movdqu STATE2, 0x10(OUTP) - _aesni_gf128mul_x_ble() - movdqa IV, STATE2 - movdqu 0x50(INP), INC - pxor INC, STATE2 - movdqu IV, 0x50(OUTP) - movdqu 0x20(OUTP), INC pxor INC, STATE3 movdqu STATE3, 0x20(OUTP) - _aesni_gf128mul_x_ble() - movdqa IV, STATE3 - movdqu 0x60(INP), INC - pxor INC, STATE3 - movdqu IV, 0x60(OUTP) - movdqu 0x30(OUTP), INC pxor INC, STATE4 movdqu STATE4, 0x30(OUTP) _aesni_gf128mul_x_ble() - movdqa IV, STATE4 - movdqu 0x70(INP), INC - pxor INC, STATE4 - movdqu IV, 0x70(OUTP) - _aesni_gf128mul_x_ble() + add $64, INP + add $64, OUTP + sub $64, LEN + ja .Lxts_enc_loop4 + movups IV, (IVP) - CALL_NOSPEC r11 + FRAME_END + ret +SYM_FUNC_END(aesni_xts_encrypt) + +/* + * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst, + * const u8 *src, unsigned int len, le128 *iv) + */ +SYM_FUNC_START(aesni_xts_decrypt) + FRAME_BEGIN + + movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK + movups (IVP), IV + + mov 480(KEYP), KLEN + add $240, KEYP - movdqu 0x40(OUTP), INC +.Lxts_dec_loop4: + movdqa IV, STATE1 + movdqu 0x00(INP), INC pxor INC, STATE1 - movdqu STATE1, 0x40(OUTP) + movdqu IV, 0x00(OUTP) - movdqu 0x50(OUTP), INC + _aesni_gf128mul_x_ble() + movdqa IV, STATE2 + movdqu 0x10(INP), INC + pxor INC, STATE2 + movdqu IV, 0x10(OUTP) + + _aesni_gf128mul_x_ble() + movdqa IV, STATE3 + movdqu 0x20(INP), INC + pxor INC, STATE3 + movdqu IV, 0x20(OUTP) + + _aesni_gf128mul_x_ble() + movdqa IV, STATE4 + movdqu 0x30(INP), INC + pxor INC, STATE4 + movdqu IV, 0x30(OUTP) + + call _aesni_dec4 + + movdqu 0x00(OUTP), INC + pxor INC, STATE1 + movdqu STATE1, 0x00(OUTP) + + movdqu 0x10(OUTP), INC pxor INC, STATE2 - movdqu STATE2, 0x50(OUTP) + movdqu STATE2, 0x10(OUTP) - movdqu 0x60(OUTP), INC + movdqu 0x20(OUTP), INC pxor INC, STATE3 - movdqu STATE3, 0x60(OUTP) + movdqu STATE3, 0x20(OUTP) - movdqu 0x70(OUTP), INC + movdqu 0x30(OUTP), INC pxor INC, STATE4 - movdqu STATE4, 0x70(OUTP) + movdqu STATE4, 0x30(OUTP) + + _aesni_gf128mul_x_ble() + + add $64, INP + add $64, OUTP + sub $64, LEN + ja .Lxts_dec_loop4 + + movups IV, (IVP) FRAME_END ret -SYM_FUNC_END(aesni_xts_crypt8) +SYM_FUNC_END(aesni_xts_decrypt) #endif diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index ad8a7188a2bf..be891fdf8d17 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -97,6 +97,12 @@ asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, #define AVX_GEN2_OPTSIZE 640 #define AVX_GEN4_OPTSIZE 4096 +asmlinkage void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); + +asmlinkage void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, + const u8 *in, unsigned int len, u8 *iv); + #ifdef CONFIG_X86_64 static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out, @@ -104,9 +110,6 @@ static void (*aesni_ctr_enc_tfm)(struct crypto_aes_ctx *ctx, u8 *out, asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); -asmlinkage void aesni_xts_crypt8(const struct crypto_aes_ctx *ctx, u8 *out, - const u8 *in, bool enc, le128 *iv); - /* asmlinkage void aesni_gcm_enc() * void *ctx, AES Key schedule. Starts on a 16 byte boundary. * struct gcm_context_data. May be uninitialized. @@ -547,14 +550,14 @@ static void aesni_xts_dec(const void *ctx, u8 *dst, const u8 *src, le128 *iv) glue_xts_crypt_128bit_one(ctx, dst, src, iv, aesni_dec); } -static void aesni_xts_enc8(const void *ctx, u8 *dst, const u8 *src, le128 *iv) +static void aesni_xts_enc32(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - aesni_xts_crypt8(ctx, dst, src, true, iv); + aesni_xts_encrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv); } -static void aesni_xts_dec8(const void *ctx, u8 *dst, const u8 *src, le128 *iv) +static void aesni_xts_dec32(const void *ctx, u8 *dst, const u8 *src, le128 *iv) { - aesni_xts_crypt8(ctx, dst, src, false, iv); + aesni_xts_decrypt(ctx, dst, src, 32 * AES_BLOCK_SIZE, (u8 *)iv); } static const struct common_glue_ctx aesni_enc_xts = { @@ -562,8 +565,8 @@ static const struct common_glue_ctx aesni_enc_xts = { .fpu_blocks_limit = 1, .funcs = { { - .num_blocks = 8, - .fn_u = { .xts = aesni_xts_enc8 } + .num_blocks = 32, + .fn_u = { .xts = aesni_xts_enc32 } }, { .num_blocks = 1, .fn_u = { .xts = aesni_xts_enc } @@ -575,8 +578,8 @@ static const struct common_glue_ctx aesni_dec_xts = { .fpu_blocks_limit = 1, .funcs = { { - .num_blocks = 8, - .fn_u = { .xts = aesni_xts_dec8 } + .num_blocks = 32, + .fn_u = { .xts = aesni_xts_dec32 } }, { .num_blocks = 1, .fn_u = { .xts = aesni_xts_dec } @@ -686,7 +689,8 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, struct crypto_aead *tfm = crypto_aead_reqtfm(req); unsigned long auth_tag_len = crypto_aead_authsize(tfm); const struct aesni_gcm_tfm_s *gcm_tfm = aesni_gcm_tfm; - struct gcm_context_data data AESNI_ALIGN_ATTR; + u8 databuf[sizeof(struct gcm_context_data) + (AESNI_ALIGN - 8)] __aligned(8); + struct gcm_context_data *data = PTR_ALIGN((void *)databuf, AESNI_ALIGN); struct scatter_walk dst_sg_walk = {}; unsigned long left = req->cryptlen; unsigned long len, srclen, dstlen; @@ -735,8 +739,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, } kernel_fpu_begin(); - gcm_tfm->init(aes_ctx, &data, iv, - hash_subkey, assoc, assoclen); + gcm_tfm->init(aes_ctx, data, iv, hash_subkey, assoc, assoclen); if (req->src != req->dst) { while (left) { src = scatterwalk_map(&src_sg_walk); @@ -746,10 +749,10 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, len = min(srclen, dstlen); if (len) { if (enc) - gcm_tfm->enc_update(aes_ctx, &data, + gcm_tfm->enc_update(aes_ctx, data, dst, src, len); else - gcm_tfm->dec_update(aes_ctx, &data, + gcm_tfm->dec_update(aes_ctx, data, dst, src, len); } left -= len; @@ -767,10 +770,10 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, len = scatterwalk_clamp(&src_sg_walk, left); if (len) { if (enc) - gcm_tfm->enc_update(aes_ctx, &data, + gcm_tfm->enc_update(aes_ctx, data, src, src, len); else - gcm_tfm->dec_update(aes_ctx, &data, + gcm_tfm->dec_update(aes_ctx, data, src, src, len); } left -= len; @@ -779,7 +782,7 @@ static int gcmaes_crypt_by_sg(bool enc, struct aead_request *req, scatterwalk_done(&src_sg_walk, 1, left); } } - gcm_tfm->finalize(aes_ctx, &data, authTag, auth_tag_len); + gcm_tfm->finalize(aes_ctx, data, authTag, auth_tag_len); kernel_fpu_end(); if (!assocmem) @@ -828,7 +831,8 @@ static int helper_rfc4106_encrypt(struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); void *aes_ctx = &(ctx->aes_key_expanded); - u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); + u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); unsigned int i; __be32 counter = cpu_to_be32(1); @@ -855,7 +859,8 @@ static int helper_rfc4106_decrypt(struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); void *aes_ctx = &(ctx->aes_key_expanded); - u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); + u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); unsigned int i; if (unlikely(req->assoclen != 16 && req->assoclen != 20)) @@ -985,7 +990,8 @@ static int generic_gcmaes_encrypt(struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm); void *aes_ctx = &(ctx->aes_key_expanded); - u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); + u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); __be32 counter = cpu_to_be32(1); memcpy(iv, req->iv, 12); @@ -1001,7 +1007,8 @@ static int generic_gcmaes_decrypt(struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm); void *aes_ctx = &(ctx->aes_key_expanded); - u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + u8 ivbuf[16 + (AESNI_ALIGN - 8)] __aligned(8); + u8 *iv = PTR_ALIGN(&ivbuf[0], AESNI_ALIGN); memcpy(iv, req->iv, 12); *((__be32 *)(iv+12)) = counter; diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 0904f5676e4d..2e4d91f3feea 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -128,7 +128,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) regs->ax = -EFAULT; instrumentation_end(); - syscall_exit_to_user_mode(regs); + local_irq_disable(); + irqentry_exit_to_user_mode(regs); return false; } @@ -270,7 +271,7 @@ __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs) instrumentation_begin(); run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs); - instrumentation_begin(); + instrumentation_end(); set_irq_regs(old_regs); diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 541fdaf64045..0051cf5c792d 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -210,6 +210,8 @@ SYM_CODE_START(entry_SYSCALL_compat) /* Switch to the kernel stack */ movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp +SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) + /* Construct struct pt_regs on stack */ pushq $__USER32_DS /* pt_regs->ss */ pushq %r8 /* pt_regs->sp */ diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index d4569bfa83e3..bfd42e0853ed 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3575,11 +3575,16 @@ static int intel_pmu_hw_config(struct perf_event *event) return ret; if (event->attr.precise_ip) { + if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) + return -EINVAL; + if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; if (!(event->attr.sample_type & - ~intel_pmu_large_pebs_flags(event))) + ~intel_pmu_large_pebs_flags(event))) { event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS; + event->attach_state |= PERF_ATTACH_SCHED_CB; + } } if (x86_pmu.pebs_aliases) x86_pmu.pebs_aliases(event); @@ -3592,6 +3597,7 @@ static int intel_pmu_hw_config(struct perf_event *event) ret = intel_pmu_setup_lbr_filter(event); if (ret) return ret; + event->attach_state |= PERF_ATTACH_SCHED_CB; /* * BTS is set up earlier in this path, so don't account twice @@ -4397,6 +4403,9 @@ static const struct x86_cpu_desc isolation_ucodes[] = { INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X, 2, 0x0b000014), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 3, 0x00000021), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 4, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 5, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 6, 0x00000000), + INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X, 7, 0x00000000), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L, 3, 0x0000007c), INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE, 3, 0x0000007c), INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE, 9, 0x0000004e), diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 67dbc91bccfe..6e84e79bea72 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1899,7 +1899,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d */ if (!pebs_status && cpuc->pebs_enabled && !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1))) - pebs_status = cpuc->pebs_enabled; + pebs_status = p->status = cpuc->pebs_enabled; bit = find_first_bit((unsigned long *)&pebs_status, x86_pmu.max_pebs_events); diff --git a/arch/x86/include/asm/insn-eval.h b/arch/x86/include/asm/insn-eval.h index a0f839aa144d..98b4dae5e8bc 100644 --- a/arch/x86/include/asm/insn-eval.h +++ b/arch/x86/include/asm/insn-eval.h @@ -23,6 +23,8 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx); int insn_get_code_seg_params(struct pt_regs *regs); int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]); +int insn_fetch_from_user_inatomic(struct pt_regs *regs, + unsigned char buf[MAX_INSN_SIZE]); bool insn_decode(struct insn *insn, struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE], int buf_size); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3d6616f6f6ef..e0cfd620b293 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -894,6 +894,12 @@ enum kvm_irqchip_mode { KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ }; +struct kvm_x86_msr_filter { + u8 count; + bool default_allow:1; + struct msr_bitmap_range ranges[16]; +}; + #define APICV_INHIBIT_REASON_DISABLE 0 #define APICV_INHIBIT_REASON_HYPERV 1 #define APICV_INHIBIT_REASON_NESTED 2 @@ -989,14 +995,12 @@ struct kvm_arch { bool guest_can_read_msr_platform_info; bool exception_payload_enabled; + bool bus_lock_detection_enabled; + /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ u32 user_space_msr_mask; - struct { - u8 count; - bool default_allow:1; - struct msr_bitmap_range ranges[16]; - } msr_filter; + struct kvm_x86_msr_filter __rcu *msr_filter; struct kvm_pmu_event_filter *pmu_event_filter; struct task_struct *nx_lpage_recovery_thread; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c20a52b5534b..c66df6368909 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -552,15 +552,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, *size = fpu_kernel_xstate_size; } -/* - * Thread-synchronous status. - * - * This is different from the flags in that nobody else - * ever touches our thread-synchronous status, so we don't - * have to worry about atomic accesses. - */ -#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ - static inline void native_load_sp0(unsigned long sp0) { diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 2c35f1c01a2d..b6a9d51d1d79 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -25,6 +25,7 @@ void __end_SYSENTER_singlestep_region(void); void entry_SYSENTER_compat(void); void __end_entry_SYSENTER_compat(void); void entry_SYSCALL_compat(void); +void entry_SYSCALL_compat_safe_stack(void); void entry_INT80_compat(void); #ifdef CONFIG_XEN_PV void xen_entry_INT80_compat(void); diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index d8324a236696..409f661481e1 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -94,6 +94,8 @@ struct pt_regs { #include #endif +#include + struct cpuinfo_x86; struct task_struct; @@ -175,6 +177,19 @@ static inline bool any_64bit_mode(struct pt_regs *regs) #ifdef CONFIG_X86_64 #define current_user_stack_pointer() current_pt_regs()->sp #define compat_user_stack_pointer() current_pt_regs()->sp + +static inline bool ip_within_syscall_gap(struct pt_regs *regs) +{ + bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 && + regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack); + +#ifdef CONFIG_IA32_EMULATION + ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat && + regs->ip < (unsigned long)entry_SYSCALL_compat_safe_stack); +#endif + + return ret; +} #endif static inline unsigned long kernel_stack_pointer(struct pt_regs *regs) diff --git a/arch/x86/include/asm/static_call.h b/arch/x86/include/asm/static_call.h index c37f11999d0c..cbb67b6030f9 100644 --- a/arch/x86/include/asm/static_call.h +++ b/arch/x86/include/asm/static_call.h @@ -37,4 +37,11 @@ #define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; nop; nop; nop; nop") + +#define ARCH_ADD_TRAMP_KEY(name) \ + asm(".pushsection .static_call_tramp_key, \"a\" \n" \ + ".long " STATIC_CALL_TRAMP_STR(name) " - . \n" \ + ".long " STATIC_CALL_KEY_STR(name) " - . \n" \ + ".popsection \n") + #endif /* _ASM_STATIC_CALL_H */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 0d751d5da702..30d1d187019f 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -205,10 +205,31 @@ static inline int arch_within_stack_frames(const void * const stack, #endif +/* + * Thread-synchronous status. + * + * This is different from the flags in that nobody else + * ever touches our thread-synchronous status, so we don't + * have to worry about atomic accesses. + */ +#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ + +#ifndef __ASSEMBLY__ #ifdef CONFIG_COMPAT #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ +#define TS_COMPAT_RESTART 0x0008 + +#define arch_set_restart_data arch_set_restart_data + +static inline void arch_set_restart_data(struct restart_block *restart) +{ + struct thread_info *ti = current_thread_info(); + if (ti->status & TS_COMPAT) + ti->status |= TS_COMPAT_RESTART; + else + ti->status &= ~TS_COMPAT_RESTART; +} #endif -#ifndef __ASSEMBLY__ #ifdef CONFIG_X86_32 #define in_ia32_syscall() true diff --git a/arch/x86/include/asm/vermagic.h b/arch/x86/include/asm/vermagic.h index 75884d2cdec3..47cc383d41d4 100644 --- a/arch/x86/include/asm/vermagic.h +++ b/arch/x86/include/asm/vermagic.h @@ -17,6 +17,46 @@ #define MODULE_PROC_FAMILY "586MMX " #elif defined CONFIG_MCORE2 #define MODULE_PROC_FAMILY "CORE2 " +#elif defined CONFIG_MNATIVE_INTEL +#define MODULE_PROC_FAMILY "NATIVE_INTEL " +#elif defined CONFIG_MNATIVE_AMD +#define MODULE_PROC_FAMILY "NATIVE_AMD " +#elif defined CONFIG_MNEHALEM +#define MODULE_PROC_FAMILY "NEHALEM " +#elif defined CONFIG_MWESTMERE +#define MODULE_PROC_FAMILY "WESTMERE " +#elif defined CONFIG_MSILVERMONT +#define MODULE_PROC_FAMILY "SILVERMONT " +#elif defined CONFIG_MGOLDMONT +#define MODULE_PROC_FAMILY "GOLDMONT " +#elif defined CONFIG_MGOLDMONTPLUS +#define MODULE_PROC_FAMILY "GOLDMONTPLUS " +#elif defined CONFIG_MSANDYBRIDGE +#define MODULE_PROC_FAMILY "SANDYBRIDGE " +#elif defined CONFIG_MIVYBRIDGE +#define MODULE_PROC_FAMILY "IVYBRIDGE " +#elif defined CONFIG_MHASWELL +#define MODULE_PROC_FAMILY "HASWELL " +#elif defined CONFIG_MBROADWELL +#define MODULE_PROC_FAMILY "BROADWELL " +#elif defined CONFIG_MSKYLAKE +#define MODULE_PROC_FAMILY "SKYLAKE " +#elif defined CONFIG_MSKYLAKEX +#define MODULE_PROC_FAMILY "SKYLAKEX " +#elif defined CONFIG_MCANNONLAKE +#define MODULE_PROC_FAMILY "CANNONLAKE " +#elif defined CONFIG_MICELAKE +#define MODULE_PROC_FAMILY "ICELAKE " +#elif defined CONFIG_MCASCADELAKE +#define MODULE_PROC_FAMILY "CASCADELAKE " +#elif defined CONFIG_MCOOPERLAKE +#define MODULE_PROC_FAMILY "COOPERLAKE " +#elif defined CONFIG_MTIGERLAKE +#define MODULE_PROC_FAMILY "TIGERLAKE " +#elif defined CONFIG_MSAPPHIRERAPIDS +#define MODULE_PROC_FAMILY "SAPPHIRERAPIDS " +#elif defined CONFIG_MALDERLAKE +#define MODULE_PROC_FAMILY "ALDERLAKE " #elif defined CONFIG_MATOM #define MODULE_PROC_FAMILY "ATOM " #elif defined CONFIG_M686 @@ -35,6 +75,30 @@ #define MODULE_PROC_FAMILY "K7 " #elif defined CONFIG_MK8 #define MODULE_PROC_FAMILY "K8 " +#elif defined CONFIG_MK8SSE3 +#define MODULE_PROC_FAMILY "K8SSE3 " +#elif defined CONFIG_MK10 +#define MODULE_PROC_FAMILY "K10 " +#elif defined CONFIG_MBARCELONA +#define MODULE_PROC_FAMILY "BARCELONA " +#elif defined CONFIG_MBOBCAT +#define MODULE_PROC_FAMILY "BOBCAT " +#elif defined CONFIG_MBULLDOZER +#define MODULE_PROC_FAMILY "BULLDOZER " +#elif defined CONFIG_MPILEDRIVER +#define MODULE_PROC_FAMILY "PILEDRIVER " +#elif defined CONFIG_MSTEAMROLLER +#define MODULE_PROC_FAMILY "STEAMROLLER " +#elif defined CONFIG_MJAGUAR +#define MODULE_PROC_FAMILY "JAGUAR " +#elif defined CONFIG_MEXCAVATOR +#define MODULE_PROC_FAMILY "EXCAVATOR " +#elif defined CONFIG_MZEN +#define MODULE_PROC_FAMILY "ZEN " +#elif defined CONFIG_MZEN2 +#define MODULE_PROC_FAMILY "ZEN2 " +#elif defined CONFIG_MZEN3 +#define MODULE_PROC_FAMILY "ZEN3 " #elif defined CONFIG_MELAN #define MODULE_PROC_FAMILY "ELAN " #elif defined CONFIG_MCRUSOE diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index 9aad0e0876fb..fda3e7747c22 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -30,15 +30,22 @@ static inline int cpu_has_vmx(void) } -/** Disable VMX on the current CPU +/** + * cpu_vmxoff() - Disable VMX on the current CPU * - * vmxoff causes a undefined-opcode exception if vmxon was not run - * on the CPU previously. Only call this function if you know VMX - * is enabled. + * Disable VMX and clear CR4.VMXE (even if VMXOFF faults) + * + * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to + * atomically track post-VMXON state, e.g. this may be called in NMI context. + * Eat all faults as all other faults on VMXOFF faults are mode related, i.e. + * faults are guaranteed to be due to the !post-VMXON check unless the CPU is + * magically in RM, VM86, compat mode, or at CPL>0. */ static inline void cpu_vmxoff(void) { - asm volatile ("vmxoff"); + asm_volatile_goto("1: vmxoff\n\t" + _ASM_EXTABLE(1b, %l[fault]) :::: fault); +fault: cr4_clear_bits(X86_CR4_VMXE); } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 7f4c081f59f0..2745c24453f2 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2334,6 +2334,11 @@ static int cpuid_to_apicid[] = { [0 ... NR_CPUS - 1] = -1, }; +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == cpuid_to_apicid[cpu]; +} + #ifdef CONFIG_SMP /** * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e4ab4804b20d..04ef995d1200 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1032,6 +1032,16 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) { irq = mp_irqs[idx].srcbusirq; legacy = mp_is_legacy_irq(irq); + /* + * IRQ2 is unusable for historical reasons on systems which + * have a legacy PIC. See the comment vs. IRQ2 further down. + * + * If this gets removed at some point then the related code + * in lapic_assign_system_vectors() needs to be adjusted as + * well. + */ + if (legacy && irq == PIC_CASCADE_IR) + return -EINVAL; } mutex_lock(&ioapic_mutex); diff --git a/arch/x86/kernel/cpu/sgx/main.c b/arch/x86/kernel/cpu/sgx/main.c index c519fc5f6948..8df81a3ed945 100644 --- a/arch/x86/kernel/cpu/sgx/main.c +++ b/arch/x86/kernel/cpu/sgx/main.c @@ -700,25 +700,27 @@ static bool __init sgx_page_cache_init(void) return true; } -static void __init sgx_init(void) +static int __init sgx_init(void) { int ret; int i; if (!cpu_feature_enabled(X86_FEATURE_SGX)) - return; + return -ENODEV; if (!sgx_page_cache_init()) - return; + return -ENOMEM; - if (!sgx_page_reclaimer_init()) + if (!sgx_page_reclaimer_init()) { + ret = -ENOMEM; goto err_page_cache; + } ret = sgx_drv_init(); if (ret) goto err_kthread; - return; + return 0; err_kthread: kthread_stop(ksgxd_tsk); @@ -728,6 +730,8 @@ static void __init sgx_init(void) vfree(sgx_epc_sections[i].pages); memunmap(sgx_epc_sections[i].virt_addr); } + + return ret; } device_initcall(sgx_init); diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index aa593743acf6..1fc0962c89c0 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -268,21 +268,20 @@ static void __init kvmclock_init_mem(void) static int __init kvm_setup_vsyscall_timeinfo(void) { -#ifdef CONFIG_X86_64 - u8 flags; + kvmclock_init_mem(); - if (!per_cpu(hv_clock_per_cpu, 0) || !kvmclock_vsyscall) - return 0; +#ifdef CONFIG_X86_64 + if (per_cpu(hv_clock_per_cpu, 0) && kvmclock_vsyscall) { + u8 flags; - flags = pvclock_read_flags(&hv_clock_boot[0].pvti); - if (!(flags & PVCLOCK_TSC_STABLE_BIT)) - return 0; + flags = pvclock_read_flags(&hv_clock_boot[0].pvti); + if (!(flags & PVCLOCK_TSC_STABLE_BIT)) + return 0; - kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK; + kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK; + } #endif - kvmclock_init_mem(); - return 0; } early_initcall(kvm_setup_vsyscall_timeinfo); diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 34b153cbd4ac..5e9a34b5bd74 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -114,6 +114,7 @@ int apply_relocate(Elf32_Shdr *sechdrs, *location += sym->st_value; break; case R_386_PC32: + case R_386_PLT32: /* Add the value, subtract its position */ *location += sym->st_value - (uint32_t)location; break; diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 8a67d1fa8dc5..ed8ac6bcbafb 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c @@ -182,6 +182,13 @@ static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) err = security_locked_down(LOCKDOWN_MSR); if (err) break; + + err = filter_write(regs[1]); + if (err) + return err; + + add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK); + err = wrmsr_safe_regs_on_cpu(cpu, regs); if (err) break; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index db115943e8bd..b29657b76e3f 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -477,6 +477,15 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = { }, }, + { /* PCIe Wifi card isn't detected after reboot otherwise */ + .callback = set_pci_reboot, + .ident = "Zotac ZBOX CI327 nano", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NA"), + DMI_MATCH(DMI_PRODUCT_NAME, "ZBOX-CI327NANO-GS-01"), + }, + }, + /* Sony */ { /* Handle problems with rebooting on Sony VGN-Z540N */ .callback = set_bios_reboot, @@ -538,31 +547,21 @@ static void emergency_vmx_disable_all(void) local_irq_disable(); /* - * We need to disable VMX on all CPUs before rebooting, otherwise - * we risk hanging up the machine, because the CPU ignores INIT - * signals when VMX is enabled. - * - * We can't take any locks and we may be on an inconsistent - * state, so we use NMIs as IPIs to tell the other CPUs to disable - * VMX and halt. + * Disable VMX on all CPUs before rebooting, otherwise we risk hanging + * the machine, because the CPU blocks INIT when it's in VMX root. * - * For safety, we will avoid running the nmi_shootdown_cpus() - * stuff unnecessarily, but we don't have a way to check - * if other CPUs have VMX enabled. So we will call it only if the - * CPU we are running on has VMX enabled. + * We can't take any locks and we may be on an inconsistent state, so + * use NMIs as IPIs to tell the other CPUs to exit VMX root and halt. * - * We will miss cases where VMX is not enabled on all CPUs. This - * shouldn't do much harm because KVM always enable VMX on all - * CPUs anyway. But we can miss it on the small window where KVM - * is still enabling VMX. + * Do the NMI shootdown even if VMX if off on _this_ CPU, as that + * doesn't prevent a different CPU from being in VMX root operation. */ - if (cpu_has_vmx() && cpu_vmx_enabled()) { - /* Disable VMX on this CPU. */ - cpu_vmxoff(); + if (cpu_has_vmx()) { + /* Safely force _this_ CPU out of VMX root operation. */ + __cpu_emergency_vmxoff(); - /* Halt and disable VMX on the other CPUs */ + /* Halt and exit VMX root operation on the other CPUs. */ nmi_shootdown_cpus(vmxoff_nmi); - } } diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c index 84c1821819af..04a780abb512 100644 --- a/arch/x86/kernel/sev-es.c +++ b/arch/x86/kernel/sev-es.c @@ -121,8 +121,18 @@ static void __init setup_vc_stacks(int cpu) cea_set_pte((void *)vaddr, pa, PAGE_KERNEL); } -static __always_inline bool on_vc_stack(unsigned long sp) +static __always_inline bool on_vc_stack(struct pt_regs *regs) { + unsigned long sp = regs->sp; + + /* User-mode RSP is not trusted */ + if (user_mode(regs)) + return false; + + /* SYSCALL gap still has user-mode RSP */ + if (ip_within_syscall_gap(regs)) + return false; + return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC))); } @@ -144,7 +154,7 @@ void noinstr __sev_es_ist_enter(struct pt_regs *regs) old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]); /* Make room on the IST stack */ - if (on_vc_stack(regs->sp)) + if (on_vc_stack(regs)) new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist); else new_ist = old_ist - sizeof(old_ist); @@ -248,7 +258,7 @@ static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt) int res; if (user_mode(ctxt->regs)) { - res = insn_fetch_from_user(ctxt->regs, buffer); + res = insn_fetch_from_user_inatomic(ctxt->regs, buffer); if (!res) { ctxt->fi.vector = X86_TRAP_PF; ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; @@ -1248,13 +1258,12 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) { struct sev_es_runtime_data *data = this_cpu_read(runtime_data); + irqentry_state_t irq_state; struct ghcb_state state; struct es_em_ctxt ctxt; enum es_result result; struct ghcb *ghcb; - lockdep_assert_irqs_disabled(); - /* * Handle #DB before calling into !noinstr code to avoid recursive #DB. */ @@ -1263,6 +1272,8 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) return; } + irq_state = irqentry_nmi_enter(regs); + lockdep_assert_irqs_disabled(); instrumentation_begin(); /* @@ -1325,6 +1336,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) out: instrumentation_end(); + irqentry_nmi_exit(regs, irq_state); return; diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ea794a083c44..6c26d2c3a2e4 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -766,30 +766,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) { - /* - * This function is fundamentally broken as currently - * implemented. - * - * The idea is that we want to trigger a call to the - * restart_block() syscall and that we want in_ia32_syscall(), - * in_x32_syscall(), etc. to match whatever they were in the - * syscall being restarted. We assume that the syscall - * instruction at (regs->ip - 2) matches whatever syscall - * instruction we used to enter in the first place. - * - * The problem is that we can get here when ptrace pokes - * syscall-like values into regs even if we're not in a syscall - * at all. - * - * For now, we maintain historical behavior and guess based on - * stored state. We could do better by saving the actual - * syscall arch in restart_block or (with caveats on x32) by - * checking if regs->ip points to 'int $0x80'. The current - * behavior is incorrect if a tracer has a different bitness - * than the tracee. - */ #ifdef CONFIG_IA32_EMULATION - if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (current_thread_info()->status & TS_COMPAT_RESTART) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 7f5aec758f0e..ac1874a2a70e 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -694,8 +694,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r * In the SYSCALL entry path the RSP value comes from user-space - don't * trust it and switch to the current kernel stack */ - if (regs->ip >= (unsigned long)entry_SYSCALL_64 && - regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack) { + if (ip_within_syscall_gap(regs)) { sp = this_cpu_read(cpu_current_top_of_stack); goto sync; } diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 73f800100066..c451d5f6422f 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -367,8 +367,8 @@ static bool deref_stack_regs(struct unwind_state *state, unsigned long addr, if (!stack_access_ok(state, addr, sizeof(struct pt_regs))) return false; - *ip = regs->ip; - *sp = regs->sp; + *ip = READ_ONCE_NOCHECK(regs->ip); + *sp = READ_ONCE_NOCHECK(regs->sp); return true; } @@ -380,8 +380,8 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr if (!stack_access_ok(state, addr, IRET_FRAME_SIZE)) return false; - *ip = regs->ip; - *sp = regs->sp; + *ip = READ_ONCE_NOCHECK(regs->ip); + *sp = READ_ONCE_NOCHECK(regs->sp); return true; } @@ -402,12 +402,12 @@ static bool get_reg(struct unwind_state *state, unsigned int reg_off, return false; if (state->full_regs) { - *val = ((unsigned long *)state->regs)[reg]; + *val = READ_ONCE_NOCHECK(((unsigned long *)state->regs)[reg]); return true; } if (state->prev_regs) { - *val = ((unsigned long *)state->prev_regs)[reg]; + *val = READ_ONCE_NOCHECK(((unsigned long *)state->prev_regs)[reg]); return true; } diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 66a08322988f..1453b9b79442 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -2564,12 +2564,12 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78); ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED; - val = GET_SMSTATE(u32, smstate, 0x7f68); + val = GET_SMSTATE(u64, smstate, 0x7f68); if (ctxt->ops->set_dr(ctxt, 6, (val & DR6_VOLATILE) | DR6_FIXED_1)) return X86EMUL_UNHANDLEABLE; - val = GET_SMSTATE(u32, smstate, 0x7f60); + val = GET_SMSTATE(u64, smstate, 0x7f60); if (ctxt->ops->set_dr(ctxt, 7, (val & DR7_VOLATILE) | DR7_FIXED_1)) return X86EMUL_UNHANDLEABLE; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 43cceadd073e..570fa298083c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1641,7 +1641,16 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn) } if (kvm_use_posted_timer_interrupt(apic->vcpu)) { - kvm_wait_lapic_expire(vcpu); + /* + * Ensure the guest's timer has truly expired before posting an + * interrupt. Open code the relevant checks to avoid querying + * lapic_timer_int_injected(), which will be false since the + * interrupt isn't yet injected. Waiting until after injecting + * is not an option since that won't help a posted interrupt. + */ + if (vcpu->arch.apic->lapic_timer.expired_tscdeadline && + vcpu->arch.apic->lapic_timer.timer_advance_ns) + __kvm_wait_lapic_expire(vcpu); kvm_apic_inject_pending_timer_irqs(apic); return; } diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 6d16481aa29d..ed861245ecf0 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -2417,7 +2417,7 @@ static unsigned long kvm_mmu_zap_oldest_mmu_pages(struct kvm *kvm, return 0; restart: - list_for_each_entry_safe(sp, tmp, &kvm->arch.active_mmu_pages, link) { + list_for_each_entry_safe_reverse(sp, tmp, &kvm->arch.active_mmu_pages, link) { /* * Don't zap active root pages, the page itself can't be freed * and zapping it will just force vCPUs to realloc and reload. diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index bfc6389edc28..cf101b73a360 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -76,12 +76,15 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep) static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu) { /* - * When using the EPT page-modification log, the GPAs in the log - * would come from L2 rather than L1. Therefore, we need to rely - * on write protection to record dirty pages. This also bypasses - * PML, since writes now result in a vmexit. + * When using the EPT page-modification log, the GPAs in the CPU dirty + * log would come from L2 rather than L1. Therefore, we need to rely + * on write protection to record dirty pages, which bypasses PML, since + * writes now result in a vmexit. Note, the check on CPU dirty logging + * being enabled is mandatory as the bits used to denote WP-only SPTEs + * are reserved for NPT w/ PAE (32-bit KVM). */ - return vcpu->arch.mmu == &vcpu->arch.guest_mmu; + return vcpu->arch.mmu == &vcpu->arch.guest_mmu && + kvm_x86_ops.cpu_dirty_log_size; } bool is_nx_huge_page_enabled(void); diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index b56d604809b8..17976998bffb 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1067,7 +1067,8 @@ static void zap_collapsible_spte_range(struct kvm *kvm, pfn = spte_to_pfn(iter.old_spte); if (kvm_is_reserved_pfn(pfn) || - !PageTransCompoundMap(pfn_to_page(pfn))) + (!PageCompound(pfn_to_page(pfn)) && + !kvm_is_zone_device_pfn(pfn))) continue; tdp_mmu_set_spte(kvm, &iter, 0); diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index db30670dd8c4..d36773c7b535 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -51,6 +51,23 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, nested_svm_vmexit(svm); } +static void svm_inject_page_fault_nested(struct kvm_vcpu *vcpu, struct x86_exception *fault) +{ + struct vcpu_svm *svm = to_svm(vcpu); + WARN_ON(!is_guest_mode(vcpu)); + + if (vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) && + !svm->nested.nested_run_pending) { + svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR; + svm->vmcb->control.exit_code_hi = 0; + svm->vmcb->control.exit_info_1 = fault->error_code; + svm->vmcb->control.exit_info_2 = fault->address; + nested_svm_vmexit(svm); + } else { + kvm_inject_page_fault(vcpu, fault); + } +} + static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) { struct vcpu_svm *svm = to_svm(vcpu); @@ -58,7 +75,7 @@ static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) u64 pdpte; int ret; - ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte, + ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte, offset_in_page(cr3) + index * 8, 8); if (ret) return 0; @@ -446,6 +463,9 @@ int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb12_gpa, if (ret) return ret; + if (!npt_enabled) + svm->vcpu.arch.mmu->inject_page_fault = svm_inject_page_fault_nested; + svm_set_gif(svm, true); return 0; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 3442d44ca53b..6a0670548125 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1105,12 +1105,12 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) static void svm_check_invpcid(struct vcpu_svm *svm) { /* - * Intercept INVPCID instruction only if shadow page table is - * enabled. Interception is not required with nested page table - * enabled. + * Intercept INVPCID if shadow paging is enabled to sync/free shadow + * roots, or if INVPCID is disabled in the guest to inject #UD. */ if (kvm_cpu_cap_has(X86_FEATURE_INVPCID)) { - if (!npt_enabled) + if (!npt_enabled || + !guest_cpuid_has(&svm->vcpu, X86_FEATURE_INVPCID)) svm_set_intercept(svm, INTERCEPT_INVPCID); else svm_clr_intercept(svm, INTERCEPT_INVPCID); @@ -1205,6 +1205,7 @@ static void init_vmcb(struct vcpu_svm *svm) init_sys_seg(&save->ldtr, SEG_TYPE_LDT); init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16); + svm_set_cr4(&svm->vcpu, 0); svm_set_efer(&svm->vcpu, 0); save->dr6 = 0xffff0ff0; kvm_set_rflags(&svm->vcpu, 2); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1b404e4d7dd8..f37f5c1430cf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1523,35 +1523,44 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type) { + struct kvm_x86_msr_filter *msr_filter; + struct msr_bitmap_range *ranges; struct kvm *kvm = vcpu->kvm; - struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges; - u32 count = kvm->arch.msr_filter.count; - u32 i; - bool r = kvm->arch.msr_filter.default_allow; + bool allowed; int idx; + u32 i; - /* MSR filtering not set up or x2APIC enabled, allow everything */ - if (!count || (index >= 0x800 && index <= 0x8ff)) + /* x2APIC MSRs do not support filtering. */ + if (index >= 0x800 && index <= 0x8ff) return true; - /* Prevent collision with set_msr_filter */ idx = srcu_read_lock(&kvm->srcu); - for (i = 0; i < count; i++) { + msr_filter = srcu_dereference(kvm->arch.msr_filter, &kvm->srcu); + if (!msr_filter) { + allowed = true; + goto out; + } + + allowed = msr_filter->default_allow; + ranges = msr_filter->ranges; + + for (i = 0; i < msr_filter->count; i++) { u32 start = ranges[i].base; u32 end = start + ranges[i].nmsrs; u32 flags = ranges[i].flags; unsigned long *bitmap = ranges[i].bitmap; if ((index >= start) && (index < end) && (flags & type)) { - r = !!test_bit(index - start, bitmap); + allowed = !!test_bit(index - start, bitmap); break; } } +out: srcu_read_unlock(&kvm->srcu, idx); - return r; + return allowed; } EXPORT_SYMBOL_GPL(kvm_msr_allowed); @@ -1782,6 +1791,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) { + xfer_to_guest_mode_prepare(); return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) || xfer_to_guest_mode_work_pending(); } @@ -5314,25 +5324,34 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, return r; } -static void kvm_clear_msr_filter(struct kvm *kvm) +static struct kvm_x86_msr_filter *kvm_alloc_msr_filter(bool default_allow) +{ + struct kvm_x86_msr_filter *msr_filter; + + msr_filter = kzalloc(sizeof(*msr_filter), GFP_KERNEL_ACCOUNT); + if (!msr_filter) + return NULL; + + msr_filter->default_allow = default_allow; + return msr_filter; +} + +static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter) { u32 i; - u32 count = kvm->arch.msr_filter.count; - struct msr_bitmap_range ranges[16]; - mutex_lock(&kvm->lock); - kvm->arch.msr_filter.count = 0; - memcpy(ranges, kvm->arch.msr_filter.ranges, count * sizeof(ranges[0])); - mutex_unlock(&kvm->lock); - synchronize_srcu(&kvm->srcu); + if (!msr_filter) + return; + + for (i = 0; i < msr_filter->count; i++) + kfree(msr_filter->ranges[i].bitmap); - for (i = 0; i < count; i++) - kfree(ranges[i].bitmap); + kfree(msr_filter); } -static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user_range) +static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter, + struct kvm_msr_filter_range *user_range) { - struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges; struct msr_bitmap_range range; unsigned long *bitmap = NULL; size_t bitmap_size; @@ -5366,11 +5385,9 @@ static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user goto err; } - /* Everything ok, add this range identifier to our global pool */ - ranges[kvm->arch.msr_filter.count] = range; - /* Make sure we filled the array before we tell anyone to walk it */ - smp_wmb(); - kvm->arch.msr_filter.count++; + /* Everything ok, add this range identifier. */ + msr_filter->ranges[msr_filter->count] = range; + msr_filter->count++; return 0; err: @@ -5381,10 +5398,11 @@ static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) { struct kvm_msr_filter __user *user_msr_filter = argp; + struct kvm_x86_msr_filter *new_filter, *old_filter; struct kvm_msr_filter filter; bool default_allow; - int r = 0; bool empty = true; + int r = 0; u32 i; if (copy_from_user(&filter, user_msr_filter, sizeof(filter))) @@ -5397,25 +5415,32 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) if (empty && !default_allow) return -EINVAL; - kvm_clear_msr_filter(kvm); - - kvm->arch.msr_filter.default_allow = default_allow; + new_filter = kvm_alloc_msr_filter(default_allow); + if (!new_filter) + return -ENOMEM; - /* - * Protect from concurrent calls to this function that could trigger - * a TOCTOU violation on kvm->arch.msr_filter.count. - */ - mutex_lock(&kvm->lock); for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) { - r = kvm_add_msr_filter(kvm, &filter.ranges[i]); - if (r) - break; + r = kvm_add_msr_filter(new_filter, &filter.ranges[i]); + if (r) { + kvm_free_msr_filter(new_filter); + return r; + } } + mutex_lock(&kvm->lock); + + /* The per-VM filter is protected by kvm->lock... */ + old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1); + + rcu_assign_pointer(kvm->arch.msr_filter, new_filter); + synchronize_srcu(&kvm->srcu); + + kvm_free_msr_filter(old_filter); + kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED); mutex_unlock(&kvm->lock); - return r; + return 0; } long kvm_arch_vm_ioctl(struct file *filp, @@ -10535,8 +10560,6 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { - u32 i; - if (current->mm == kvm->mm) { /* * Free memory regions allocated on behalf of userspace, @@ -10553,8 +10576,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } if (kvm_x86_ops.vm_destroy) kvm_x86_ops.vm_destroy(kvm); - for (i = 0; i < kvm->arch.msr_filter.count; i++) - kfree(kvm->arch.msr_filter.ranges[i].bitmap); + kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1)); kvm_pic_destroy(kvm); kvm_ioapic_destroy(kvm); kvm_free_vcpus(kvm); diff --git a/arch/x86/lib/insn-eval.c b/arch/x86/lib/insn-eval.c index 4229950a5d78..bb0b3fe1e0a0 100644 --- a/arch/x86/lib/insn-eval.c +++ b/arch/x86/lib/insn-eval.c @@ -1415,6 +1415,25 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs) } } +static unsigned long insn_get_effective_ip(struct pt_regs *regs) +{ + unsigned long seg_base = 0; + + /* + * If not in user-space long mode, a custom code segment could be in + * use. This is true in protected mode (if the process defined a local + * descriptor table), or virtual-8086 mode. In most of the cases + * seg_base will be zero as in USER_CS. + */ + if (!user_64bit_mode(regs)) { + seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); + if (seg_base == -1L) + return 0; + } + + return seg_base + regs->ip; +} + /** * insn_fetch_from_user() - Copy instruction bytes from user-space memory * @regs: Structure with register values as seen when entering kernel mode @@ -1431,24 +1450,43 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs) */ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) { - unsigned long seg_base = 0; + unsigned long ip; int not_copied; - /* - * If not in user-space long mode, a custom code segment could be in - * use. This is true in protected mode (if the process defined a local - * descriptor table), or virtual-8086 mode. In most of the cases - * seg_base will be zero as in USER_CS. - */ - if (!user_64bit_mode(regs)) { - seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); - if (seg_base == -1L) - return 0; - } + ip = insn_get_effective_ip(regs); + if (!ip) + return 0; + + not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE); + return MAX_INSN_SIZE - not_copied; +} + +/** + * insn_fetch_from_user_inatomic() - Copy instruction bytes from user-space memory + * while in atomic code + * @regs: Structure with register values as seen when entering kernel mode + * @buf: Array to store the fetched instruction + * + * Gets the linear address of the instruction and copies the instruction bytes + * to the buf. This function must be used in atomic context. + * + * Returns: + * + * Number of instruction bytes copied. + * + * 0 if nothing was copied. + */ +int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) +{ + unsigned long ip; + int not_copied; + + ip = insn_get_effective_ip(regs); + if (!ip) + return 0; - not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip), - MAX_INSN_SIZE); + not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE); return MAX_INSN_SIZE - not_copied; } diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index f1f1b5a0956a..441c3e9b8971 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -54,7 +54,7 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr) * 32-bit mode: * * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch. - * Check that here and ignore it. + * Check that here and ignore it. This is AMD erratum #91. * * 64-bit mode: * @@ -83,11 +83,7 @@ check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr, #ifdef CONFIG_X86_64 case 0x40: /* - * In AMD64 long mode 0x40..0x4F are valid REX prefixes - * Need to figure out under what instruction mode the - * instruction was issued. Could check the LDT for lm, - * but for now it's good enough to assume that long - * mode only uses well known segments or kernel. + * In 64-bit mode 0x40..0x4F are valid REX prefixes */ return (!user_mode(regs) || user_64bit_mode(regs)); #endif @@ -127,20 +123,31 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) instr = (void *)convert_ip_to_linear(current, regs); max_instr = instr + 15; - if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX) - return 0; + /* + * This code has historically always bailed out if IP points to a + * not-present page (e.g. due to a race). No one has ever + * complained about this. + */ + pagefault_disable(); while (instr < max_instr) { unsigned char opcode; - if (get_kernel_nofault(opcode, instr)) - break; + if (user_mode(regs)) { + if (get_user(opcode, instr)) + break; + } else { + if (get_kernel_nofault(opcode, instr)) + break; + } instr++; if (!check_prefetch_opcode(regs, instr, opcode, &prefetch)) break; } + + pagefault_enable(); return prefetch; } diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index c3d5f0236f35..868fd69814bf 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -262,7 +262,7 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) if (pgprot_val(old_prot) == pgprot_val(new_prot)) return; - pa = pfn << page_level_shift(level); + pa = pfn << PAGE_SHIFT; size = page_level_size(level); /* diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 8f665c352bf0..ca311aaa67b8 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -1164,12 +1164,14 @@ static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + kfree(v); ++*pos; return memtype_get_idx(*pos); } static void memtype_seq_stop(struct seq_file *seq, void *v) { + kfree(v); } static int memtype_seq_show(struct seq_file *seq, void *v) @@ -1181,8 +1183,6 @@ static int memtype_seq_show(struct seq_file *seq, void *v) entry_print->end, cattr_name(entry_print->type)); - kfree(entry_print); - return 0; } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 796506dcfc42..023ac12f54a2 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1735,7 +1735,7 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, * add rsp, 8 // skip eth_type_trans's frame * ret // return to its caller */ -int arch_prepare_bpf_trampoline(void *image, void *image_end, +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_progs *tprogs, void *orig_call) @@ -1774,6 +1774,15 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, save_regs(m, &prog, nr_args, stack_size); + if (flags & BPF_TRAMP_F_CALL_ORIG) { + /* arg1: mov rdi, im */ + emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im); + if (emit_call(&prog, __bpf_tramp_enter, prog)) { + ret = -EINVAL; + goto cleanup; + } + } + if (fentry->nr_progs) if (invoke_bpf(m, &prog, fentry, stack_size)) return -EINVAL; @@ -1792,8 +1801,7 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, } if (flags & BPF_TRAMP_F_CALL_ORIG) { - if (fentry->nr_progs || fmod_ret->nr_progs) - restore_regs(m, &prog, nr_args, stack_size); + restore_regs(m, &prog, nr_args, stack_size); /* call original function */ if (emit_call(&prog, orig_call, prog)) { @@ -1802,6 +1810,9 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, } /* remember return value in a stack for bpf prog to access */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); + im->ip_after_call = prog; + memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE); + prog += X86_PATCH_SIZE; } if (fmod_ret->nr_progs) { @@ -1832,9 +1843,17 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, * the return value is only updated on the stack and still needs to be * restored to R0. */ - if (flags & BPF_TRAMP_F_CALL_ORIG) + if (flags & BPF_TRAMP_F_CALL_ORIG) { + im->ip_epilogue = prog; + /* arg1: mov rdi, im */ + emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im); + if (emit_call(&prog, __bpf_tramp_exit, prog)) { + ret = -EINVAL; + goto cleanup; + } /* restore original return value back into RAX */ emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); + } EMIT1(0x5B); /* pop rbx */ EMIT1(0xC9); /* leave */ diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index ce7188cbdae5..1c3a1962cade 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -867,9 +867,11 @@ static int do_reloc32(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, case R_386_PC32: case R_386_PC16: case R_386_PC8: + case R_386_PLT32: /* - * NONE can be ignored and PC relative relocations don't - * need to be adjusted. + * NONE can be ignored and PC relative relocations don't need + * to be adjusted. Because sym must be defined, R_386_PLT32 can + * be treated the same way as R_386_PC32. */ break; @@ -910,9 +912,11 @@ static int do_reloc_real(struct section *sec, Elf_Rel *rel, Elf_Sym *sym, case R_386_PC32: case R_386_PC16: case R_386_PC8: + case R_386_PLT32: /* - * NONE can be ignored and PC relative relocations don't - * need to be adjusted. + * NONE can be ignored and PC relative relocations don't need + * to be adjusted. Because sym must be defined, R_386_PLT32 can + * be treated the same way as R_386_PC32. */ break; diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 3301875dd196..8bdba2564cf9 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -98,8 +98,8 @@ EXPORT_SYMBOL_GPL(xen_p2m_size); unsigned long xen_max_p2m_pfn __read_mostly; EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT -#define P2M_LIMIT CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT +#ifdef CONFIG_XEN_MEMORY_HOTPLUG_LIMIT +#define P2M_LIMIT CONFIG_XEN_MEMORY_HOTPLUG_LIMIT #else #define P2M_LIMIT 0 #endif @@ -652,10 +652,9 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn) pte_t *ptep; unsigned int level; - if (unlikely(pfn >= xen_p2m_size)) { - BUG_ON(mfn != INVALID_P2M_ENTRY); - return true; - } + /* Only invalid entries allowed above the highest p2m covered frame. */ + if (unlikely(pfn >= xen_p2m_size)) + return mfn == INVALID_P2M_ENTRY; /* * The interface requires atomic updates on p2m elements. @@ -710,9 +709,12 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, for (i = 0; i < count; i++) { unsigned long mfn, pfn; + struct gnttab_unmap_grant_ref unmap[2]; + int rc; /* Do not add to override if the map failed. */ - if (map_ops[i].status) + if (map_ops[i].status != GNTST_okay || + (kmap_ops && kmap_ops[i].status != GNTST_okay)) continue; if (map_ops[i].flags & GNTMAP_contains_pte) { @@ -726,10 +728,46 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned"); - if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { - ret = -ENOMEM; - goto out; + if (likely(set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) + continue; + + /* + * Signal an error for this slot. This in turn requires + * immediate unmapping. + */ + map_ops[i].status = GNTST_general_error; + unmap[0].host_addr = map_ops[i].host_addr, + unmap[0].handle = map_ops[i].handle; + map_ops[i].handle = ~0; + if (map_ops[i].flags & GNTMAP_device_map) + unmap[0].dev_bus_addr = map_ops[i].dev_bus_addr; + else + unmap[0].dev_bus_addr = 0; + + if (kmap_ops) { + kmap_ops[i].status = GNTST_general_error; + unmap[1].host_addr = kmap_ops[i].host_addr, + unmap[1].handle = kmap_ops[i].handle; + kmap_ops[i].handle = ~0; + if (kmap_ops[i].flags & GNTMAP_device_map) + unmap[1].dev_bus_addr = kmap_ops[i].dev_bus_addr; + else + unmap[1].dev_bus_addr = 0; } + + /* + * Pre-populate both status fields, to be recognizable in + * the log message below. + */ + unmap[0].status = 1; + unmap[1].status = 1; + + rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, + unmap, 1 + !!kmap_ops); + if (rc || unmap[0].status != GNTST_okay || + unmap[1].status != GNTST_okay) + pr_err_once("gnttab unmap failed: rc=%d st0=%d st1=%d\n", + rc, unmap[0].status, unmap[1].status); } out: @@ -750,17 +788,15 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, unsigned long mfn = __pfn_to_mfn(page_to_pfn(pages[i])); unsigned long pfn = page_to_pfn(pages[i]); - if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { + if (mfn != INVALID_P2M_ENTRY && (mfn & FOREIGN_FRAME_BIT)) + set_phys_to_machine(pfn, INVALID_P2M_ENTRY); + else ret = -EINVAL; - goto out; - } - - set_phys_to_machine(pfn, INVALID_P2M_ENTRY); } if (kunmap_ops) ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, - kunmap_ops, count); -out: + kunmap_ops, count) ?: ret; + return ret; } EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 7eab14d56369..8bfc10330107 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -59,13 +59,13 @@ static struct { } xen_remap_buf __initdata __aligned(PAGE_SIZE); static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY; -/* +/* * The maximum amount of extra memory compared to the base size. The * main scaling factor is the size of struct page. At extreme ratios * of base:extra, all the base memory can be filled with page * structures for the extra memory, leaving no space for anything * else. - * + * * 10x seems like a reasonable balance between scaling flexibility and * leaving a practically usable system. */ @@ -791,17 +791,10 @@ char * __init xen_memory_setup(void) /* * Clamp the amount of extra memory to a EXTRA_MEM_RATIO - * factor the base size. On non-highmem systems, the base - * size is the full initial memory allocation; on highmem it - * is limited to the max size of lowmem, so that it doesn't - * get completely filled. + * factor the base size. * * Make sure we have no memory above max_pages, as this area * isn't handled by the p2m management. - * - * In principle there could be a problem in lowmem systems if - * the initial memory is also very large with respect to - * lowmem, but we won't try to deal with that here. */ extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), extra_pages, max_pages - max_pfn); diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index b791e2041e49..e2f14508f2d6 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -547,6 +547,8 @@ static void bfq_pd_init(struct blkg_policy_data *pd) entity->orig_weight = entity->weight = entity->new_weight = d->weight; entity->my_sched_data = &bfqg->sched_data; + entity->last_bfqq_created = NULL; + bfqg->my_entity = entity; /* * the root_group's will be set to NULL * in bfq_init_queue() diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 9e81d1052091..3599d020bf24 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c @@ -158,10 +158,9 @@ BFQ_BFQQ_FNS(in_large_burst); BFQ_BFQQ_FNS(coop); BFQ_BFQQ_FNS(split_coop); BFQ_BFQQ_FNS(softrt_update); -BFQ_BFQQ_FNS(has_waker); #undef BFQ_BFQQ_FNS \ -/* Expiration time of sync (0) and async (1) requests, in ns. */ +/* Expiration time of async (0) and sync (1) requests, in ns. */ static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 }; /* Maximum backwards seek (magic number lifted from CFQ), in KiB. */ @@ -1011,7 +1010,7 @@ static void bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, struct bfq_io_cq *bic, bool bfq_already_existing) { - unsigned int old_wr_coeff = bfqq->wr_coeff; + unsigned int old_wr_coeff = 1; bool busy = bfq_already_existing && bfq_bfqq_busy(bfqq); if (bic->saved_has_short_ttime) @@ -1024,9 +1023,22 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, else bfq_clear_bfqq_IO_bound(bfqq); + bfqq->last_serv_time_ns = bic->saved_last_serv_time_ns; + bfqq->inject_limit = bic->saved_inject_limit; + bfqq->decrease_time_jif = bic->saved_decrease_time_jif; + bfqq->entity.new_weight = bic->saved_weight; bfqq->ttime = bic->saved_ttime; - bfqq->wr_coeff = bic->saved_wr_coeff; + bfqq->io_start_time = bic->saved_io_start_time; + bfqq->tot_idle_time = bic->saved_tot_idle_time; + /* + * Restore weight coefficient only if low_latency is on + */ + if (bfqd->low_latency) { + old_wr_coeff = bfqq->wr_coeff; + bfqq->wr_coeff = bic->saved_wr_coeff; + } + bfqq->service_from_wr = bic->saved_service_from_wr; bfqq->wr_start_at_switch_to_srt = bic->saved_wr_start_at_switch_to_srt; bfqq->last_wr_start_finish = bic->saved_last_wr_start_finish; bfqq->wr_cur_max_time = bic->saved_wr_cur_max_time; @@ -1061,7 +1073,7 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd, static int bfqq_process_refs(struct bfq_queue *bfqq) { return bfqq->ref - bfqq->allocated - bfqq->entity.on_st_or_in_serv - - (bfqq->weight_counter != NULL); + (bfqq->weight_counter != NULL) - bfqq->stable_ref; } /* Empty burst list and add just bfqq (see comments on bfq_handle_burst) */ @@ -1647,6 +1659,8 @@ static bool bfq_bfqq_higher_class_or_weight(struct bfq_queue *bfqq, return bfqq_weight > in_serv_weight; } +static bool bfq_better_to_idle(struct bfq_queue *bfqq); + static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, struct bfq_queue *bfqq, int old_wr_coeff, @@ -1671,15 +1685,19 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, * - it is sync, * - it does not belong to a large burst, * - it has been idle for enough time or is soft real-time, - * - is linked to a bfq_io_cq (it is not shared in any sense). + * - is linked to a bfq_io_cq (it is not shared in any sense), + * - has a default weight (otherwise we assume the user wanted + * to control its weight explicitly) */ in_burst = bfq_bfqq_in_large_burst(bfqq); soft_rt = bfqd->bfq_wr_max_softrt_rate > 0 && !BFQQ_TOTALLY_SEEKY(bfqq) && !in_burst && time_is_before_jiffies(bfqq->soft_rt_next_start) && - bfqq->dispatched == 0; - *interactive = !in_burst && idle_for_long_time; + bfqq->dispatched == 0 && + bfqq->entity.new_weight == 40; + *interactive = !in_burst && idle_for_long_time && + bfqq->entity.new_weight == 40; wr_or_deserves_wr = bfqd->low_latency && (bfqq->wr_coeff > 1 || (bfq_bfqq_sync(bfqq) && @@ -1717,17 +1735,6 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, bfq_clear_bfqq_just_created(bfqq); - - if (!bfq_bfqq_IO_bound(bfqq)) { - if (arrived_in_time) { - bfqq->requests_within_timer++; - if (bfqq->requests_within_timer >= - bfqd->bfq_requests_within_timer) - bfq_mark_bfqq_IO_bound(bfqq); - } else - bfqq->requests_within_timer = 0; - } - if (bfqd->low_latency) { if (unlikely(time_is_after_jiffies(bfqq->split_time))) /* wraparound */ @@ -1755,10 +1762,10 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, bfq_add_bfqq_busy(bfqd, bfqq); /* - * Expire in-service queue only if preemption may be needed - * for guarantees. In particular, we care only about two - * cases. The first is that bfqq has to recover a service - * hole, as explained in the comments on + * Expire in-service queue if preemption may be needed for + * guarantees or throughput. As for guarantees, we care + * explicitly about two cases. The first is that bfqq has to + * recover a service hole, as explained in the comments on * bfq_bfqq_update_budg_for_activation(), i.e., that * bfqq_wants_to_preempt is true. However, if bfqq does not * carry time-critical I/O, then bfqq's bandwidth is less @@ -1785,11 +1792,23 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd, * timestamps of the in-service queue would need to be * updated, and this operation is quite costly (see the * comments on bfq_bfqq_update_budg_for_activation()). + * + * As for throughput, we ask bfq_better_to_idle() whether we + * still need to plug I/O dispatching. If bfq_better_to_idle() + * says no, then plugging is not needed any longer, either to + * boost throughput or to perserve service guarantees. Then + * the best option is to stop plugging I/O, as not doing so + * would certainly lower throughput. We may end up in this + * case if: (1) upon a dispatch attempt, we detected that it + * was better to plug I/O dispatch, and to wait for a new + * request to arrive for the currently in-service queue, but + * (2) this switch of bfqq to busy changes the scenario. */ if (bfqd->in_service_queue && ((bfqq_wants_to_preempt && bfqq->wr_coeff >= bfqd->in_service_queue->wr_coeff) || - bfq_bfqq_higher_class_or_weight(bfqq, bfqd->in_service_queue)) && + bfq_bfqq_higher_class_or_weight(bfqq, bfqd->in_service_queue) || + !bfq_better_to_idle(bfqd->in_service_queue)) && next_queue_may_preempt(bfqd)) bfq_bfqq_expire(bfqd, bfqd->in_service_queue, false, BFQQE_PREEMPTED); @@ -1861,6 +1880,138 @@ static void bfq_reset_inject_limit(struct bfq_data *bfqd, bfqq->decrease_time_jif = jiffies; } +static void bfq_update_io_intensity(struct bfq_queue *bfqq, u64 now_ns) +{ + u64 tot_io_time = now_ns - bfqq->io_start_time; + + if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfqq->dispatched == 0) + bfqq->tot_idle_time += + now_ns - bfqq->ttime.last_end_request; + + if (unlikely(bfq_bfqq_just_created(bfqq))) + return; + + /* + * Must be busy for at least about 80% of the time to be + * considered I/O bound. + */ + if (bfqq->tot_idle_time * 5 > tot_io_time) + bfq_clear_bfqq_IO_bound(bfqq); + else + bfq_mark_bfqq_IO_bound(bfqq); + + /* + * Keep an observation window of at most 200 ms in the past + * from now. + */ + if (tot_io_time > 200 * NSEC_PER_MSEC) { + bfqq->io_start_time = now_ns - (tot_io_time>>1); + bfqq->tot_idle_time >>= 1; + } +} + +/* + * Detect whether bfqq's I/O seems synchronized with that of some + * other queue, i.e., whether bfqq, after remaining empty, happens to + * receive new I/O only right after some I/O request of the other + * queue has been completed. We call waker queue the other queue, and + * we assume, for simplicity, that bfqq may have at most one waker + * queue. + * + * A remarkable throughput boost can be reached by unconditionally + * injecting the I/O of the waker queue, every time a new + * bfq_dispatch_request happens to be invoked while I/O is being + * plugged for bfqq. In addition to boosting throughput, this + * unblocks bfqq's I/O, thereby improving bandwidth and latency for + * bfqq. Note that these same results may be achieved with the general + * injection mechanism, but less effectively. For details on this + * aspect, see the comments on the choice of the queue for injection + * in bfq_select_queue(). + * + * Turning back to the detection of a waker queue, a queue Q is deemed + * as a waker queue for bfqq if, for three consecutive times, bfqq + * happens to become non empty right after a request of Q has been + * completed. In particular, on the first time, Q is tentatively set + * as a candidate waker queue, while on the third consecutive time + * that Q is detected, the field waker_bfqq is set to Q, to confirm + * that Q is a waker queue for bfqq. These detection steps are + * performed only if bfqq has a long think time, so as to make it more + * likely that bfqq's I/O is actually being blocked by a + * synchronization. This last filter, plus the above three-times + * requirement, make false positives less likely. + * + * NOTE + * + * The sooner a waker queue is detected, the sooner throughput can be + * boosted by injecting I/O from the waker queue. Fortunately, + * detection is likely to be actually fast, for the following + * reasons. While blocked by synchronization, bfqq has a long think + * time. This implies that bfqq's inject limit is at least equal to 1 + * (see the comments in bfq_update_inject_limit()). So, thanks to + * injection, the waker queue is likely to be served during the very + * first I/O-plugging time interval for bfqq. This triggers the first + * step of the detection mechanism. Thanks again to injection, the + * candidate waker queue is then likely to be confirmed no later than + * during the next I/O-plugging interval for bfqq. + * + * ISSUE + * + * On queue merging all waker information is lost. + */ +static void bfq_check_waker(struct bfq_data *bfqd, struct bfq_queue *bfqq, + u64 now_ns) +{ + if (!bfqd->last_completed_rq_bfqq || + bfqd->last_completed_rq_bfqq == bfqq || + bfq_bfqq_has_short_ttime(bfqq) || + now_ns - bfqd->last_completion >= 4 * NSEC_PER_MSEC || + bfqd->last_completed_rq_bfqq == bfqq->waker_bfqq) + return; + + if (bfqd->last_completed_rq_bfqq != + bfqq->tentative_waker_bfqq) { + /* + * First synchronization detected with a + * candidate waker queue, or with a different + * candidate waker queue from the current one. + */ + bfqq->tentative_waker_bfqq = + bfqd->last_completed_rq_bfqq; + bfqq->num_waker_detections = 1; + } else /* Same tentative waker queue detected again */ + bfqq->num_waker_detections++; + + if (bfqq->num_waker_detections == 3) { + bfqq->waker_bfqq = bfqd->last_completed_rq_bfqq; + bfqq->tentative_waker_bfqq = NULL; + + /* + * If the waker queue disappears, then + * bfqq->waker_bfqq must be reset. To + * this goal, we maintain in each + * waker queue a list, woken_list, of + * all the queues that reference the + * waker queue through their + * waker_bfqq pointer. When the waker + * queue exits, the waker_bfqq pointer + * of all the queues in the woken_list + * is reset. + * + * In addition, if bfqq is already in + * the woken_list of a waker queue, + * then, before being inserted into + * the woken_list of a new waker + * queue, bfqq must be removed from + * the woken_list of the old waker + * queue. + */ + if (!hlist_unhashed(&bfqq->woken_list_node)) + hlist_del_init(&bfqq->woken_list_node); + hlist_add_head(&bfqq->woken_list_node, + &bfqd->last_completed_rq_bfqq->woken_list); + } +} + static void bfq_add_request(struct request *rq) { struct bfq_queue *bfqq = RQ_BFQQ(rq); @@ -1868,117 +2019,14 @@ static void bfq_add_request(struct request *rq) struct request *next_rq, *prev; unsigned int old_wr_coeff = bfqq->wr_coeff; bool interactive = false; + u64 now_ns = ktime_get_ns(); bfq_log_bfqq(bfqd, bfqq, "add_request %d", rq_is_sync(rq)); bfqq->queued[rq_is_sync(rq)]++; bfqd->queued++; if (RB_EMPTY_ROOT(&bfqq->sort_list) && bfq_bfqq_sync(bfqq)) { - /* - * Detect whether bfqq's I/O seems synchronized with - * that of some other queue, i.e., whether bfqq, after - * remaining empty, happens to receive new I/O only - * right after some I/O request of the other queue has - * been completed. We call waker queue the other - * queue, and we assume, for simplicity, that bfqq may - * have at most one waker queue. - * - * A remarkable throughput boost can be reached by - * unconditionally injecting the I/O of the waker - * queue, every time a new bfq_dispatch_request - * happens to be invoked while I/O is being plugged - * for bfqq. In addition to boosting throughput, this - * unblocks bfqq's I/O, thereby improving bandwidth - * and latency for bfqq. Note that these same results - * may be achieved with the general injection - * mechanism, but less effectively. For details on - * this aspect, see the comments on the choice of the - * queue for injection in bfq_select_queue(). - * - * Turning back to the detection of a waker queue, a - * queue Q is deemed as a waker queue for bfqq if, for - * two consecutive times, bfqq happens to become non - * empty right after a request of Q has been - * completed. In particular, on the first time, Q is - * tentatively set as a candidate waker queue, while - * on the second time, the flag - * bfq_bfqq_has_waker(bfqq) is set to confirm that Q - * is a waker queue for bfqq. These detection steps - * are performed only if bfqq has a long think time, - * so as to make it more likely that bfqq's I/O is - * actually being blocked by a synchronization. This - * last filter, plus the above two-times requirement, - * make false positives less likely. - * - * NOTE - * - * The sooner a waker queue is detected, the sooner - * throughput can be boosted by injecting I/O from the - * waker queue. Fortunately, detection is likely to be - * actually fast, for the following reasons. While - * blocked by synchronization, bfqq has a long think - * time. This implies that bfqq's inject limit is at - * least equal to 1 (see the comments in - * bfq_update_inject_limit()). So, thanks to - * injection, the waker queue is likely to be served - * during the very first I/O-plugging time interval - * for bfqq. This triggers the first step of the - * detection mechanism. Thanks again to injection, the - * candidate waker queue is then likely to be - * confirmed no later than during the next - * I/O-plugging interval for bfqq. - */ - if (bfqd->last_completed_rq_bfqq && - !bfq_bfqq_has_short_ttime(bfqq) && - ktime_get_ns() - bfqd->last_completion < - 200 * NSEC_PER_USEC) { - if (bfqd->last_completed_rq_bfqq != bfqq && - bfqd->last_completed_rq_bfqq != - bfqq->waker_bfqq) { - /* - * First synchronization detected with - * a candidate waker queue, or with a - * different candidate waker queue - * from the current one. - */ - bfqq->waker_bfqq = bfqd->last_completed_rq_bfqq; - - /* - * If the waker queue disappears, then - * bfqq->waker_bfqq must be reset. To - * this goal, we maintain in each - * waker queue a list, woken_list, of - * all the queues that reference the - * waker queue through their - * waker_bfqq pointer. When the waker - * queue exits, the waker_bfqq pointer - * of all the queues in the woken_list - * is reset. - * - * In addition, if bfqq is already in - * the woken_list of a waker queue, - * then, before being inserted into - * the woken_list of a new waker - * queue, bfqq must be removed from - * the woken_list of the old waker - * queue. - */ - if (!hlist_unhashed(&bfqq->woken_list_node)) - hlist_del_init(&bfqq->woken_list_node); - hlist_add_head(&bfqq->woken_list_node, - &bfqd->last_completed_rq_bfqq->woken_list); - - bfq_clear_bfqq_has_waker(bfqq); - } else if (bfqd->last_completed_rq_bfqq == - bfqq->waker_bfqq && - !bfq_bfqq_has_waker(bfqq)) { - /* - * synchronization with waker_bfqq - * seen for the second time - */ - bfq_mark_bfqq_has_waker(bfqq); - } - } + bfq_check_waker(bfqd, bfqq, now_ns); /* * Periodically reset inject limit, to make sure that @@ -2047,6 +2095,9 @@ static void bfq_add_request(struct request *rq) } } + if (bfq_bfqq_sync(bfqq)) + bfq_update_io_intensity(bfqq, now_ns); + elv_rb_add(&bfqq->sort_list, rq); /* @@ -2352,6 +2403,24 @@ static void bfq_requests_merged(struct request_queue *q, struct request *rq, /* Must be called with bfqq != NULL */ static void bfq_bfqq_end_wr(struct bfq_queue *bfqq) { + /* + * If bfqq has been enjoying interactive weight-raising, then + * reset soft_rt_next_start. We do it for the following + * reason. bfqq may have been conveying the I/O needed to load + * a soft real-time application. Such an application actually + * exhibits a soft real-time I/O pattern after it finishes + * loading, and finally starts doing its job. But, if bfqq has + * been receiving a lot of bandwidth so far (likely to happen + * on a fast device), then soft_rt_next_start now contains a + * high value that. So, without this reset, bfqq would be + * prevented from being possibly considered as soft_rt for a + * very long time. + */ + + if (bfqq->wr_cur_max_time != + bfqq->bfqd->bfq_wr_rt_max_time) + bfqq->soft_rt_next_start = jiffies; + if (bfq_bfqq_busy(bfqq)) bfqq->bfqd->wr_busy_queues--; bfqq->wr_coeff = 1; @@ -2557,6 +2626,11 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, return true; } +static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd, + struct bfq_queue *bfqq); + +static void bfq_put_stable_ref(struct bfq_queue *bfqq); + /* * Attempt to schedule a merge of bfqq with the currently in-service * queue or with a close queue among the scheduled queues. Return @@ -2579,10 +2653,49 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq, */ static struct bfq_queue * bfq_setup_cooperator(struct bfq_data *bfqd, struct bfq_queue *bfqq, - void *io_struct, bool request) + void *io_struct, bool request, struct bfq_io_cq *bic) { struct bfq_queue *in_service_bfqq, *new_bfqq; + /* + * Check delayed stable merge for rotational or non-queueing + * devs. For this branch to be executed, bfqq must not be + * currently merged with some other queue (i.e., bfqq->bic + * must be non null). If we considered also merged queues, + * then we should also check whether bfqq has already been + * merged with bic->stable_merge_bfqq. But this would be + * costly and complicated. + */ + if (unlikely(!bfqd->nonrot_with_queueing)) { + if (bic->stable_merge_bfqq && + !bfq_bfqq_just_created(bfqq) && + time_is_after_jiffies(bfqq->split_time + + msecs_to_jiffies(200))) { + struct bfq_queue *stable_merge_bfqq = + bic->stable_merge_bfqq; + int proc_ref = min(bfqq_process_refs(bfqq), + bfqq_process_refs(stable_merge_bfqq)); + + /* deschedule stable merge, because done or aborted here */ + bfq_put_stable_ref(stable_merge_bfqq); + + bic->stable_merge_bfqq = NULL; + + if (!idling_boosts_thr_without_issues(bfqd, bfqq) && + proc_ref > 0) { + /* next function will take at least one ref */ + struct bfq_queue *new_bfqq = + bfq_setup_merge(bfqq, stable_merge_bfqq); + + bic->stably_merged = true; + if (new_bfqq && new_bfqq->bic) + new_bfqq->bic->stably_merged = true; + return new_bfqq; + } else + return NULL; + } + } + /* * Do not perform queue merging if the device is non * rotational and performs internal queueing. In fact, such a @@ -2686,10 +2799,16 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) if (!bic) return; + bic->saved_last_serv_time_ns = bfqq->last_serv_time_ns; + bic->saved_inject_limit = bfqq->inject_limit; + bic->saved_decrease_time_jif = bfqq->decrease_time_jif; + bic->saved_weight = bfqq->entity.orig_weight; bic->saved_ttime = bfqq->ttime; bic->saved_has_short_ttime = bfq_bfqq_has_short_ttime(bfqq); bic->saved_IO_bound = bfq_bfqq_IO_bound(bfqq); + bic->saved_io_start_time = bfqq->io_start_time; + bic->saved_tot_idle_time = bfqq->tot_idle_time; bic->saved_in_large_burst = bfq_bfqq_in_large_burst(bfqq); bic->was_in_burst_list = !hlist_unhashed(&bfqq->burst_list_node); if (unlikely(bfq_bfqq_just_created(bfqq) && @@ -2712,11 +2831,23 @@ static void bfq_bfqq_save_state(struct bfq_queue *bfqq) bic->saved_wr_coeff = bfqq->wr_coeff; bic->saved_wr_start_at_switch_to_srt = bfqq->wr_start_at_switch_to_srt; + bic->saved_service_from_wr = bfqq->service_from_wr; bic->saved_last_wr_start_finish = bfqq->last_wr_start_finish; bic->saved_wr_cur_max_time = bfqq->wr_cur_max_time; } } + +static void +bfq_reassign_last_bfqq(struct bfq_queue *cur_bfqq, struct bfq_queue *new_bfqq) +{ + if (cur_bfqq->entity.parent && + cur_bfqq->entity.parent->last_bfqq_created == cur_bfqq) + cur_bfqq->entity.parent->last_bfqq_created = new_bfqq; + else if (cur_bfqq->bfqd && cur_bfqq->bfqd->last_bfqq_created == cur_bfqq) + cur_bfqq->bfqd->last_bfqq_created = new_bfqq; +} + void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq) { /* @@ -2734,6 +2865,8 @@ void bfq_release_process_ref(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfqq != bfqd->in_service_queue) bfq_del_bfqq_busy(bfqd, bfqq, false); + bfq_reassign_last_bfqq(bfqq, NULL); + bfq_put_queue(bfqq); } @@ -2750,6 +2883,29 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, bfq_mark_bfqq_IO_bound(new_bfqq); bfq_clear_bfqq_IO_bound(bfqq); + /* + * The processes associated with bfqq are cooperators of the + * processes associated with new_bfqq. So, if bfqq has a + * waker, then assume that all these processes will be happy + * to let bfqq's waker freely inject I/O when they have no + * I/O. + */ + if (bfqq->waker_bfqq && !new_bfqq->waker_bfqq && + bfqq->waker_bfqq != new_bfqq) { + new_bfqq->waker_bfqq = bfqq->waker_bfqq; + new_bfqq->tentative_waker_bfqq = NULL; + + /* + * If the waker queue disappears, then + * new_bfqq->waker_bfqq must be reset. So insert + * new_bfqq into the woken_list of the waker. See + * bfq_check_waker for details. + */ + hlist_add_head(&new_bfqq->woken_list_node, + &new_bfqq->waker_bfqq->woken_list); + + } + /* * If bfqq is weight-raised, then let new_bfqq inherit * weight-raising. To reduce false positives, neglect the case @@ -2807,6 +2963,9 @@ bfq_merge_bfqqs(struct bfq_data *bfqd, struct bfq_io_cq *bic, */ new_bfqq->pid = -1; bfqq->bic = NULL; + + bfq_reassign_last_bfqq(bfqq, new_bfqq); + bfq_release_process_ref(bfqd, bfqq); } @@ -2834,7 +2993,7 @@ static bool bfq_allow_bio_merge(struct request_queue *q, struct request *rq, * We take advantage of this function to perform an early merge * of the queues of possible cooperating processes. */ - new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false); + new_bfqq = bfq_setup_cooperator(bfqd, bfqq, bio, false, bfqd->bio_bic); if (new_bfqq) { /* * bic still points to bfqq, then it has not yet been @@ -2937,6 +3096,7 @@ static void __bfq_set_in_service_queue(struct bfq_data *bfqd, } bfqd->in_service_queue = bfqq; + bfqd->in_serv_last_pos = 0; } /* @@ -3442,20 +3602,38 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq) * order until all the requests already queued in the device have been * served. The last sub-condition commented above somewhat mitigates * this problem for weight-raised queues. + * + * However, as an additional mitigation for this problem, we preserve + * plugging for a special symmetric case that may suddenly turn into + * asymmetric: the case where only bfqq is busy. In this case, not + * expiring bfqq does not cause any harm to any other queues in terms + * of service guarantees. In contrast, it avoids the following unlucky + * sequence of events: (1) bfqq is expired, (2) a new queue with a + * lower weight than bfqq becomes busy (or more queues), (3) the new + * queue is served until a new request arrives for bfqq, (4) when bfqq + * is finally served, there are so many requests of the new queue in + * the drive that the pending requests for bfqq take a lot of time to + * be served. In particular, event (2) may case even already + * dispatched requests of bfqq to be delayed, inside the drive. So, to + * avoid this series of events, the scenario is preventively declared + * as asymmetric also if bfqq is the only busy queues */ static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd, struct bfq_queue *bfqq) { + int tot_busy_queues = bfq_tot_busy_queues(bfqd); + /* No point in idling for bfqq if it won't get requests any longer */ if (unlikely(!bfqq_process_refs(bfqq))) return false; return (bfqq->wr_coeff > 1 && (bfqd->wr_busy_queues < - bfq_tot_busy_queues(bfqd) || + tot_busy_queues || bfqd->rq_in_driver >= bfqq->dispatched + 4)) || - bfq_asymmetric_scenario(bfqd, bfqq); + bfq_asymmetric_scenario(bfqd, bfqq) || + tot_busy_queues == 1; } static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq, @@ -3939,10 +4117,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, bfq_bfqq_budget_left(bfqq) >= entity->budget / 3))) bfq_bfqq_charge_time(bfqd, bfqq, delta); - if (reason == BFQQE_TOO_IDLE && - entity->service <= 2 * entity->budget / 10) - bfq_clear_bfqq_IO_bound(bfqq); - if (bfqd->low_latency && bfqq->wr_coeff == 1) bfqq->last_wr_start_finish = jiffies; @@ -3952,30 +4126,15 @@ void bfq_bfqq_expire(struct bfq_data *bfqd, * If we get here, and there are no outstanding * requests, then the request pattern is isochronous * (see the comments on the function - * bfq_bfqq_softrt_next_start()). Thus we can compute - * soft_rt_next_start. And we do it, unless bfqq is in - * interactive weight raising. We do not do it in the - * latter subcase, for the following reason. bfqq may - * be conveying the I/O needed to load a soft - * real-time application. Such an application will - * actually exhibit a soft real-time I/O pattern after - * it finally starts doing its job. But, if - * soft_rt_next_start is computed here for an - * interactive bfqq, and bfqq had received a lot of - * service before remaining with no outstanding - * request (likely to happen on a fast device), then - * soft_rt_next_start would be assigned such a high - * value that, for a very long time, bfqq would be - * prevented from being possibly considered as soft - * real time. + * bfq_bfqq_softrt_next_start()). Therefore we can + * compute soft_rt_next_start. * * If, instead, the queue still has outstanding * requests, then we have to wait for the completion * of all the outstanding requests to discover whether * the request pattern is actually isochronous. */ - if (bfqq->dispatched == 0 && - bfqq->wr_coeff != bfqd->bfq_wr_coeff) + if (bfqq->dispatched == 0) bfqq->soft_rt_next_start = bfq_bfqq_softrt_next_start(bfqd, bfqq); else if (bfqq->dispatched > 0) { @@ -4419,9 +4578,15 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) bfq_bfqq_busy(bfqq->bic->bfqq[0]) && bfqq->bic->bfqq[0]->next_rq ? bfqq->bic->bfqq[0] : NULL; + struct bfq_queue *blocked_bfqq = + !hlist_empty(&bfqq->woken_list) ? + container_of(bfqq->woken_list.first, + struct bfq_queue, + woken_list_node) + : NULL; /* - * The next three mutually-exclusive ifs decide + * The next four mutually-exclusive ifs decide * whether to try injection, and choose the queue to * pick an I/O request from. * @@ -4454,7 +4619,15 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) * next bfqq's I/O is brought forward dramatically, * for it is not blocked for milliseconds. * - * The third if checks whether bfqq is a queue for + * The third if checks whether there is a queue woken + * by bfqq, and currently with pending I/O. Such a + * woken queue does not steal bandwidth from bfqq, + * because it remains soon without I/O if bfqq is not + * served. So there is virtually no risk of loss of + * bandwidth for bfqq if this woken queue has I/O + * dispatched while bfqq is waiting for new I/O. + * + * The fourth if checks whether bfqq is a queue for * which it is better to avoid injection. It is so if * bfqq delivers more throughput when served without * any further I/O from other queues in the middle, or @@ -4474,11 +4647,11 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) * bfq_update_has_short_ttime(), it is rather likely * that, if I/O is being plugged for bfqq and the * waker queue has pending I/O requests that are - * blocking bfqq's I/O, then the third alternative + * blocking bfqq's I/O, then the fourth alternative * above lets the waker queue get served before the * I/O-plugging timeout fires. So one may deem the * second alternative superfluous. It is not, because - * the third alternative may be way less effective in + * the fourth alternative may be way less effective in * case of a synchronization. For two main * reasons. First, throughput may be low because the * inject limit may be too low to guarantee the same @@ -4487,7 +4660,7 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) * guarantees (the second alternative unconditionally * injects a pending I/O request of the waker queue * for each bfq_dispatch_request()). Second, with the - * third alternative, the duration of the plugging, + * fourth alternative, the duration of the plugging, * i.e., the time before bfqq finally receives new I/O, * may not be minimized, because the waker queue may * happen to be served only after other queues. @@ -4497,14 +4670,22 @@ static struct bfq_queue *bfq_select_queue(struct bfq_data *bfqd) bfq_serv_to_charge(async_bfqq->next_rq, async_bfqq) <= bfq_bfqq_budget_left(async_bfqq)) bfqq = bfqq->bic->bfqq[0]; - else if (bfq_bfqq_has_waker(bfqq) && + else if (bfqq->waker_bfqq && bfq_bfqq_busy(bfqq->waker_bfqq) && - bfqq->next_rq && + bfqq->waker_bfqq->next_rq && bfq_serv_to_charge(bfqq->waker_bfqq->next_rq, bfqq->waker_bfqq) <= bfq_bfqq_budget_left(bfqq->waker_bfqq) ) bfqq = bfqq->waker_bfqq; + else if (blocked_bfqq && + bfq_bfqq_busy(blocked_bfqq) && + blocked_bfqq->next_rq && + bfq_serv_to_charge(blocked_bfqq->next_rq, + blocked_bfqq) <= + bfq_bfqq_budget_left(blocked_bfqq) + ) + bfqq = blocked_bfqq; else if (!idling_boosts_thr_without_issues(bfqd, bfqq) && (bfqq->wr_coeff == 1 || bfqd->wr_busy_queues > 1 || !bfq_bfqq_has_short_ttime(bfqq))) @@ -4559,9 +4740,21 @@ static void bfq_update_wr_data(struct bfq_data *bfqd, struct bfq_queue *bfqq) bfqq->wr_cur_max_time)) { if (bfqq->wr_cur_max_time != bfqd->bfq_wr_rt_max_time || time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt + - bfq_wr_duration(bfqd))) + bfq_wr_duration(bfqd))) { + /* + * Either in interactive weight + * raising, or in soft_rt weight + * raising with the + * interactive-weight-raising period + * elapsed (so no switch back to + * interactive weight raising). + */ bfq_bfqq_end_wr(bfqq); - else { + } else { /* + * soft_rt finishing while still in + * interactive period, switch back to + * interactive weight raising + */ switch_back_to_interactive_wr(bfqq, bfqd); bfqq->entity.prio_changed = 1; } @@ -4640,9 +4833,6 @@ static bool bfq_has_work(struct blk_mq_hw_ctx *hctx) { struct bfq_data *bfqd = hctx->queue->elevator->elevator_data; - if (!atomic_read(&hctx->elevator_queued)) - return false; - /* * Avoiding lock: a race on bfqd->busy_queues should cause at * most a call to dispatch for nothing @@ -4892,7 +5082,6 @@ void bfq_put_queue(struct bfq_queue *bfqq) hlist_for_each_entry_safe(item, n, &bfqq->woken_list, woken_list_node) { item->waker_bfqq = NULL; - bfq_clear_bfqq_has_waker(item); hlist_del_init(&item->woken_list_node); } @@ -4903,6 +5092,12 @@ void bfq_put_queue(struct bfq_queue *bfqq) bfqg_and_blkg_put(bfqg); } +static void bfq_put_stable_ref(struct bfq_queue *bfqq) +{ + bfqq->stable_ref--; + bfq_put_queue(bfqq); +} + static void bfq_put_cooperator(struct bfq_queue *bfqq) { struct bfq_queue *__bfqq, *next; @@ -4947,7 +5142,14 @@ static void bfq_exit_icq_bfqq(struct bfq_io_cq *bic, bool is_sync) if (bfqq && bfqd) { unsigned long flags; - spin_lock_irqsave(&bfqd->lock, flags); + /* bfq_exit_icq is usually called with ioc->lock held, which is + * inverse order from elsewhere, which may grab ioc->lock + * under bfqd->lock if we merge requests and drop the last ioc + * refcount. Since exit_icq is either called with a refcount, + * or with queue quiesced, use a differnet lock class to + * silence lockdep + */ + spin_lock_irqsave_nested(&bfqd->lock, flags, 1); bfqq->bic = NULL; bfq_exit_bfqq(bfqd, bfqq); bic_set_bfqq(bic, NULL, is_sync); @@ -4959,6 +5161,24 @@ static void bfq_exit_icq(struct io_cq *icq) { struct bfq_io_cq *bic = icq_to_bic(icq); + if (bic->stable_merge_bfqq) { + struct bfq_data *bfqd = bic->stable_merge_bfqq->bfqd; + + /* + * bfqd is NULL if scheduler already exited, and in + * that case this is the last time bfqq is accessed. + */ + if (bfqd) { + unsigned long flags; + + spin_lock_irqsave(&bfqd->lock, flags); + bfq_put_stable_ref(bic->stable_merge_bfqq); + spin_unlock_irqrestore(&bfqd->lock, flags); + } else { + bfq_put_stable_ref(bic->stable_merge_bfqq); + } + } + bfq_exit_icq_bfqq(bic, true); bfq_exit_icq_bfqq(bic, false); } @@ -5012,12 +5232,15 @@ bfq_set_next_ioprio_data(struct bfq_queue *bfqq, struct bfq_io_cq *bic) } bfqq->entity.new_weight = bfq_ioprio_to_weight(bfqq->new_ioprio); + bfq_log_bfqq(bfqd, bfqq, "new_ioprio %d new_weight %d", + bfqq->new_ioprio, bfqq->entity.new_weight); bfqq->entity.prio_changed = 1; } static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, struct bio *bio, bool is_sync, - struct bfq_io_cq *bic); + struct bfq_io_cq *bic, + bool respawn); static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio) { @@ -5037,7 +5260,7 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio) bfqq = bic_to_bfqq(bic, false); if (bfqq) { bfq_release_process_ref(bfqd, bfqq); - bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic); + bfqq = bfq_get_queue(bfqd, bio, BLK_RW_ASYNC, bic, true); bic_set_bfqq(bic, bfqq, false); } @@ -5049,6 +5272,8 @@ static void bfq_check_ioprio_change(struct bfq_io_cq *bic, struct bio *bio) static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, struct bfq_io_cq *bic, pid_t pid, int is_sync) { + u64 now_ns = ktime_get_ns(); + RB_CLEAR_NODE(&bfqq->entity.rb_node); INIT_LIST_HEAD(&bfqq->fifo); INIT_HLIST_NODE(&bfqq->burst_list_node); @@ -5076,7 +5301,11 @@ static void bfq_init_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq, bfq_clear_bfqq_sync(bfqq); /* set end request to minus infinity from now */ - bfqq->ttime.last_end_request = ktime_get_ns() + 1; + bfqq->ttime.last_end_request = now_ns + 1; + + bfqq->creation_time = jiffies; + + bfqq->io_start_time = now_ns; bfq_mark_bfqq_IO_bound(bfqq); @@ -5125,9 +5354,156 @@ static struct bfq_queue **bfq_async_queue_prio(struct bfq_data *bfqd, } } +static struct bfq_queue * +bfq_do_early_stable_merge(struct bfq_data *bfqd, struct bfq_queue *bfqq, + struct bfq_io_cq *bic, + struct bfq_queue *last_bfqq_created) +{ + struct bfq_queue *new_bfqq = + bfq_setup_merge(bfqq, last_bfqq_created); + + if (!new_bfqq) + return bfqq; + + if (new_bfqq->bic) + new_bfqq->bic->stably_merged = true; + bic->stably_merged = true; + + /* + * Reusing merge functions. This implies that + * bfqq->bic must be set too, for + * bfq_merge_bfqqs to correctly save bfqq's + * state before killing it. + */ + bfqq->bic = bic; + bfq_merge_bfqqs(bfqd, bic, bfqq, new_bfqq); + + return new_bfqq; +} + +/* + * Many throughput-sensitive workloads are made of several parallel + * I/O flows, with all flows generated by the same application, or + * more generically by the same task (e.g., system boot). The most + * counterproductive action with these workloads is plugging I/O + * dispatch when one of the bfq_queues associated with these flows + * remains temporarily empty. + * + * To avoid this plugging, BFQ has been using a burst-handling + * mechanism for years now. This mechanism has proven effective for + * throughput, and not detrimental for service guarantees. The + * following function pushes this mechanism a little bit further, + * basing on the following two facts. + * + * First, all the I/O flows of a the same application or task + * contribute to the execution/completion of that common application + * or task. So the performance figures that matter are total + * throughput of the flows and task-wide I/O latency. In particular, + * these flows do not need to be protected from each other, in terms + * of individual bandwidth or latency. + * + * Second, the above fact holds regardless of the number of flows. + * + * Putting these two facts together, this commits merges stably the + * bfq_queues associated with these I/O flows, i.e., with the + * processes that generate these IO/ flows, regardless of how many the + * involved processes are. + * + * To decide whether a set of bfq_queues is actually associated with + * the I/O flows of a common application or task, and to merge these + * queues stably, this function operates as follows: given a bfq_queue, + * say Q2, currently being created, and the last bfq_queue, say Q1, + * created before Q2, Q2 is merged stably with Q1 if + * - very little time has elapsed since when Q1 was created + * - Q2 has the same ioprio as Q1 + * - Q2 belongs to the same group as Q1 + * + * Merging bfq_queues also reduces scheduling overhead. A fio test + * with ten random readers on /dev/nullb shows a throughput boost of + * 40%, with a quadcore. Since BFQ's execution time amounts to ~50% of + * the total per-request processing time, the above throughput boost + * implies that BFQ's overhead is reduced by more than 50%. + * + * This new mechanism most certainly obsoletes the current + * burst-handling heuristics. We keep those heuristics for the moment. + */ +static struct bfq_queue *bfq_do_or_sched_stable_merge(struct bfq_data *bfqd, + struct bfq_queue *bfqq, + struct bfq_io_cq *bic) +{ + struct bfq_queue **source_bfqq = bfqq->entity.parent ? + &bfqq->entity.parent->last_bfqq_created : + &bfqd->last_bfqq_created; + + struct bfq_queue *last_bfqq_created = *source_bfqq; + + /* + * If last_bfqq_created has not been set yet, then init it. If + * it has been set already, but too long ago, then move it + * forward to bfqq. Finally, move also if bfqq belongs to a + * different group than last_bfqq_created, or if bfqq has a + * different ioprio or ioprio_class. If none of these + * conditions holds true, then try an early stable merge or + * schedule a delayed stable merge. + * + * A delayed merge is scheduled (instead of performing an + * early merge), in case bfqq might soon prove to be more + * throughput-beneficial if not merged. Currently this is + * possible only if bfqd is rotational with no queueing. For + * such a drive, not merging bfqq is better for throughput if + * bfqq happens to contain sequential I/O. So, we wait a + * little bit for enough I/O to flow through bfqq. After that, + * if such an I/O is sequential, then the merge is + * canceled. Otherwise the merge is finally performed. + */ + if (!last_bfqq_created || + time_before(last_bfqq_created->creation_time + + bfqd->bfq_burst_interval, + bfqq->creation_time) || + bfqq->entity.parent != last_bfqq_created->entity.parent || + bfqq->ioprio != last_bfqq_created->ioprio || + bfqq->ioprio_class != last_bfqq_created->ioprio_class) + *source_bfqq = bfqq; + else if (time_after_eq(last_bfqq_created->creation_time + + bfqd->bfq_burst_interval, + bfqq->creation_time)) { + if (likely(bfqd->nonrot_with_queueing)) + /* + * With this type of drive, leaving + * bfqq alone may provide no + * throughput benefits compared with + * merging bfqq. So merge bfqq now. + */ + bfqq = bfq_do_early_stable_merge(bfqd, bfqq, + bic, + last_bfqq_created); + else { /* schedule tentative stable merge */ + /* + * get reference on last_bfqq_created, + * to prevent it from being freed, + * until we decide whether to merge + */ + last_bfqq_created->ref++; + /* + * need to keep track of stable refs, to + * compute process refs correctly + */ + last_bfqq_created->stable_ref++; + /* + * Record the bfqq to merge to. + */ + bic->stable_merge_bfqq = last_bfqq_created; + } + } + + return bfqq; +} + + static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, struct bio *bio, bool is_sync, - struct bfq_io_cq *bic) + struct bfq_io_cq *bic, + bool respawn) { const int ioprio = IOPRIO_PRIO_DATA(bic->ioprio); const int ioprio_class = IOPRIO_PRIO_CLASS(bic->ioprio); @@ -5185,7 +5561,10 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, out: bfqq->ref++; /* get a process reference to this queue */ - bfq_log_bfqq(bfqd, bfqq, "get_queue, at end: %p, %d", bfqq, bfqq->ref); + + if (bfqq != &bfqd->oom_bfqq && is_sync && !respawn) + bfqq = bfq_do_or_sched_stable_merge(bfqd, bfqq, bic); + rcu_read_unlock(); return bfqq; } @@ -5194,11 +5573,19 @@ static void bfq_update_io_thinktime(struct bfq_data *bfqd, struct bfq_queue *bfqq) { struct bfq_ttime *ttime = &bfqq->ttime; - u64 elapsed = ktime_get_ns() - bfqq->ttime.last_end_request; + u64 elapsed; + /* + * We are really interested in how long it takes for the queue to + * become busy when there is no outstanding IO for this queue. So + * ignore cases when the bfq queue has already IO queued. + */ + if (bfqq->dispatched || bfq_bfqq_busy(bfqq)) + return; + elapsed = ktime_get_ns() - bfqq->ttime.last_end_request; elapsed = min_t(u64, elapsed, 2ULL * bfqd->bfq_slice_idle); - ttime->ttime_samples = (7*bfqq->ttime.ttime_samples + 256) / 8; + ttime->ttime_samples = (7*ttime->ttime_samples + 256) / 8; ttime->ttime_total = div_u64(7*ttime->ttime_total + 256*elapsed, 8); ttime->ttime_mean = div64_ul(ttime->ttime_total + 128, ttime->ttime_samples); @@ -5213,8 +5600,26 @@ bfq_update_io_seektime(struct bfq_data *bfqd, struct bfq_queue *bfqq, if (bfqq->wr_coeff > 1 && bfqq->wr_cur_max_time == bfqd->bfq_wr_rt_max_time && - BFQQ_TOTALLY_SEEKY(bfqq)) - bfq_bfqq_end_wr(bfqq); + BFQQ_TOTALLY_SEEKY(bfqq)) { + if (time_is_before_jiffies(bfqq->wr_start_at_switch_to_srt + + bfq_wr_duration(bfqd))) { + /* + * In soft_rt weight raising with the + * interactive-weight-raising period + * elapsed (so no switch back to + * interactive weight raising). + */ + bfq_bfqq_end_wr(bfqq); + } else { /* + * stopping soft_rt weight raising + * while still in interactive period, + * switch back to interactive weight + * raising + */ + switch_back_to_interactive_wr(bfqq, bfqd); + bfqq->entity.prio_changed = 1; + } + } } static void bfq_update_has_short_ttime(struct bfq_data *bfqd, @@ -5238,12 +5643,13 @@ static void bfq_update_has_short_ttime(struct bfq_data *bfqd, return; /* Think time is infinite if no process is linked to - * bfqq. Otherwise check average think time to - * decide whether to mark as has_short_ttime + * bfqq. Otherwise check average think time to decide whether + * to mark as has_short_ttime. To this goal, compare average + * think time with half the I/O-plugging timeout. */ if (atomic_read(&bic->icq.ioc->active_ref) == 0 || (bfq_sample_valid(bfqq->ttime.ttime_samples) && - bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle)) + bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle>>1)) has_short_ttime = false; state_changed = has_short_ttime != bfq_bfqq_has_short_ttime(bfqq); @@ -5408,7 +5814,8 @@ static void bfq_rq_enqueued(struct bfq_data *bfqd, struct bfq_queue *bfqq, static bool __bfq_insert_request(struct bfq_data *bfqd, struct request *rq) { struct bfq_queue *bfqq = RQ_BFQQ(rq), - *new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true); + *new_bfqq = bfq_setup_cooperator(bfqd, bfqq, rq, true, + RQ_BIC(rq)); bool waiting, idle_timer_disabled = false; if (new_bfqq) { @@ -5514,7 +5921,49 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, spin_lock_irq(&bfqd->lock); bfqq = bfq_init_rq(rq); - if (!bfqq || at_head || blk_rq_is_passthrough(rq)) { + + /* + * Reqs with at_head or passthrough flags set are to be put + * directly into dispatch list. Additional case for putting rq + * directly into the dispatch queue: the only active + * bfq_queues are bfqq and either its waker bfq_queue or one + * of its woken bfq_queues. The rationale behind this + * additional condition is as follows: + * - consider a bfq_queue, say Q1, detected as a waker of + * another bfq_queue, say Q2 + * - by definition of a waker, Q1 blocks the I/O of Q2, i.e., + * some I/O of Q1 needs to be completed for new I/O of Q2 + * to arrive. A notable example of waker is journald + * - so, Q1 and Q2 are in any respect the queues of two + * cooperating processes (or of two cooperating sets of + * processes): the goal of Q1's I/O is doing what needs to + * be done so that new Q2's I/O can finally be + * issued. Therefore, if the service of Q1's I/O is delayed, + * then Q2's I/O is delayed too. Conversely, if Q2's I/O is + * delayed, the goal of Q1's I/O is hindered. + * - as a consequence, if some I/O of Q1/Q2 arrives while + * Q2/Q1 is the only queue in service, there is absolutely + * no point in delaying the service of such an I/O. The + * only possible result is a throughput loss + * - so, when the above condition holds, the best option is to + * have the new I/O dispatched as soon as possible + * - the most effective and efficient way to attain the above + * goal is to put the new I/O directly in the dispatch + * list + * - as an additional restriction, Q1 and Q2 must be the only + * busy queues for this commit to put the I/O of Q2/Q1 in + * the dispatch list. This is necessary, because, if also + * other queues are waiting for service, then putting new + * I/O directly in the dispatch list may evidently cause a + * violation of service guarantees for the other queues + */ + if (!bfqq || + (bfqq != bfqd->in_service_queue && + bfqd->in_service_queue != NULL && + bfq_tot_busy_queues(bfqd) == 1 + bfq_bfqq_busy(bfqq) && + (bfqq->waker_bfqq == bfqd->in_service_queue || + bfqd->in_service_queue->waker_bfqq == bfqq)) || + at_head || blk_rq_is_passthrough(rq)) { if (at_head) list_add(&rq->queuelist, &bfqd->dispatch); else @@ -5557,7 +6006,6 @@ static void bfq_insert_requests(struct blk_mq_hw_ctx *hctx, rq = list_first_entry(list, struct request, queuelist); list_del_init(&rq->queuelist); bfq_insert_request(hctx, rq, at_head); - atomic_inc(&hctx->elevator_queued); } } @@ -5655,7 +6103,17 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd) 1UL<<(BFQ_RATE_SHIFT - 10)) bfq_update_rate_reset(bfqd, NULL); bfqd->last_completion = now_ns; - bfqd->last_completed_rq_bfqq = bfqq; + /* + * Shared queues are likely to receive I/O at a high + * rate. This may deceptively let them be considered as wakers + * of other queues. But a false waker will unjustly steal + * bandwidth to its supposedly woken queue. So considering + * also shared queues in the waking mechanism may cause more + * control troubles than throughput benefits. Then do not set + * last_completed_rq_bfqq to bfqq if bfqq is a shared queue. + */ + if (!bfq_bfqq_coop(bfqq)) + bfqd->last_completed_rq_bfqq = bfqq; /* * If we are waiting to discover whether the request pattern @@ -5925,7 +6383,6 @@ static void bfq_finish_requeue_request(struct request *rq) bfq_completed_request(bfqq, bfqd); bfq_finish_requeue_request_body(bfqq); - atomic_dec(&rq->mq_hctx->elevator_queued); spin_unlock_irqrestore(&bfqd->lock, flags); } else { @@ -6013,7 +6470,7 @@ static struct bfq_queue *bfq_get_bfqq_handle_split(struct bfq_data *bfqd, if (bfqq) bfq_put_queue(bfqq); - bfqq = bfq_get_queue(bfqd, bio, is_sync, bic); + bfqq = bfq_get_queue(bfqd, bio, is_sync, bic, split); bic_set_bfqq(bic, bfqq, is_sync); if (split && is_sync) { @@ -6134,8 +6591,9 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) if (likely(!new_queue)) { /* If the queue was seeky for too long, break it apart. */ - if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq)) { - bfq_log_bfqq(bfqd, bfqq, "breaking apart bfqq"); + if (bfq_bfqq_coop(bfqq) && bfq_bfqq_split_coop(bfqq) && + !bic->stably_merged) { + struct bfq_queue *old_bfqq = bfqq; /* Update bic before losing reference to bfqq */ if (bfq_bfqq_in_large_burst(bfqq)) @@ -6144,11 +6602,24 @@ static struct bfq_queue *bfq_init_rq(struct request *rq) bfqq = bfq_split_bfqq(bic, bfqq); split = true; - if (!bfqq) + if (!bfqq) { bfqq = bfq_get_bfqq_handle_split(bfqd, bic, bio, true, is_sync, NULL); - else + bfqq->waker_bfqq = old_bfqq->waker_bfqq; + bfqq->tentative_waker_bfqq = NULL; + + /* + * If the waker queue disappears, then + * new_bfqq->waker_bfqq must be + * reset. So insert new_bfqq into the + * woken_list of the waker. See + * bfq_check_waker for details. + */ + if (bfqq->waker_bfqq) + hlist_add_head(&bfqq->woken_list_node, + &bfqq->waker_bfqq->woken_list); + } else bfqq_already_existing = true; } } @@ -6489,8 +6960,6 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e) bfqd->bfq_slice_idle = bfq_slice_idle; bfqd->bfq_timeout = bfq_timeout; - bfqd->bfq_requests_within_timer = 120; - bfqd->bfq_large_burst_thresh = 8; bfqd->bfq_burst_interval = msecs_to_jiffies(180); diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h index 703895224562..99c2a3cb081e 100644 --- a/block/bfq-iosched.h +++ b/block/bfq-iosched.h @@ -197,6 +197,9 @@ struct bfq_entity { /* flag, set if the entity is counted in groups_with_pending_reqs */ bool in_groups_with_pending_reqs; + + /* last child queue of entity created (for non-leaf entities) */ + struct bfq_queue *last_bfqq_created; }; struct bfq_group; @@ -230,6 +233,8 @@ struct bfq_ttime { struct bfq_queue { /* reference counter */ int ref; + /* counter of references from other queues for delayed stable merge */ + int stable_ref; /* parent bfq_data */ struct bfq_data *bfqd; @@ -291,6 +296,11 @@ struct bfq_queue { /* associated @bfq_ttime struct */ struct bfq_ttime ttime; + /* when bfqq started to do I/O within the last observation window */ + u64 io_start_time; + /* how long bfqq has remained empty during the last observ. window */ + u64 tot_idle_time; + /* bit vector: a 1 for each seeky requests in history */ u32 seek_history; @@ -360,6 +370,8 @@ struct bfq_queue { unsigned long first_IO_time; /* time of first I/O for this queue */ + unsigned long creation_time; /* when this queue is created */ + /* max service rate measured so far */ u32 max_service_rate; @@ -371,6 +383,11 @@ struct bfq_queue { * bfq_select_queue(). */ struct bfq_queue *waker_bfqq; + /* pointer to the curr. tentative waker queue, see bfq_check_waker() */ + struct bfq_queue *tentative_waker_bfqq; + /* number of times the same tentative waker has been detected */ + unsigned int num_waker_detections; + /* node for woken_list, see below */ struct hlist_node woken_list_node; /* @@ -407,6 +424,9 @@ struct bfq_io_cq { */ bool saved_IO_bound; + u64 saved_io_start_time; + u64 saved_tot_idle_time; + /* * Same purpose as the previous fields for the value of the * field keeping the queue's belonging to a large burst @@ -432,9 +452,20 @@ struct bfq_io_cq { */ unsigned long saved_wr_coeff; unsigned long saved_last_wr_start_finish; + unsigned long saved_service_from_wr; unsigned long saved_wr_start_at_switch_to_srt; unsigned int saved_wr_cur_max_time; struct bfq_ttime saved_ttime; + + /* Save also injection state */ + u64 saved_last_serv_time_ns; + unsigned int saved_inject_limit; + unsigned long saved_decrease_time_jif; + + /* candidate queue for a stable merge (due to close creation time) */ + struct bfq_queue *stable_merge_bfqq; + + bool stably_merged; /* non splittable if true */ }; /** @@ -559,6 +590,9 @@ struct bfq_data { /* bfqq owning the last completed rq */ struct bfq_queue *last_completed_rq_bfqq; + /* last bfqq created, among those in the root group */ + struct bfq_queue *last_bfqq_created; + /* time of last transition from empty to non-empty (ns) */ u64 last_empty_occupied_ns; @@ -641,14 +675,6 @@ struct bfq_data { */ unsigned int bfq_timeout; - /* - * Number of consecutive requests that must be issued within - * the idle time slice to set again idling to a queue which - * was marked as non-I/O-bound (see the definition of the - * IO_bound flag for further details). - */ - unsigned int bfq_requests_within_timer; - /* * Force device idling whenever needed to provide accurate * service guarantees, without caring about throughput @@ -770,7 +796,6 @@ enum bfqq_state_flags { */ BFQQF_coop, /* bfqq is shared */ BFQQF_split_coop, /* shared bfqq will be split */ - BFQQF_has_waker /* bfqq has a waker queue */ }; #define BFQ_BFQQ_FNS(name) \ @@ -790,7 +815,6 @@ BFQ_BFQQ_FNS(in_large_burst); BFQ_BFQQ_FNS(coop); BFQ_BFQQ_FNS(split_coop); BFQ_BFQQ_FNS(softrt_update); -BFQ_BFQQ_FNS(has_waker); #undef BFQ_BFQQ_FNS /* Expiration reasons. */ diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c index 26776bdbdf36..37945b775540 100644 --- a/block/bfq-wf2q.c +++ b/block/bfq-wf2q.c @@ -875,7 +875,7 @@ void bfq_bfqq_charge_time(struct bfq_data *bfqd, struct bfq_queue *bfqq, unsigned long time_ms) { struct bfq_entity *entity = &bfqq->entity; - unsigned long timeout_ms = jiffies_to_msecs(bfq_timeout); + unsigned long timeout_ms = jiffies_to_msecs(bfqd->bfq_timeout); unsigned long bounded_time_ms = min(time_ms, timeout_ms); int serv_to_charge_for_time = (bfqd->bfq_max_budget * bounded_time_ms) / timeout_ms; @@ -1709,4 +1709,12 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq) if (bfqq->wr_coeff > 1) bfqd->wr_busy_queues++; + + /* Move bfqq to the head of the woken list of its waker */ + if (!hlist_unhashed(&bfqq->woken_list_node) && + &bfqq->woken_list_node != bfqq->waker_bfqq->woken_list.first) { + hlist_del_init(&bfqq->woken_list_node); + hlist_add_head(&bfqq->woken_list_node, + &bfqq->waker_bfqq->woken_list); + } } diff --git a/block/blk-cgroup-rwstat.c b/block/blk-cgroup-rwstat.c index 85d5790ac49b..3304e841df7c 100644 --- a/block/blk-cgroup-rwstat.c +++ b/block/blk-cgroup-rwstat.c @@ -109,6 +109,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, lockdep_assert_held(&blkg->q->queue_lock); + memset(sum, 0, sizeof(*sum)); rcu_read_lock(); blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) { struct blkg_rwstat *rwstat; @@ -122,7 +123,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, rwstat = (void *)pos_blkg + off; for (i = 0; i < BLKG_RWSTAT_NR; i++) - sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i); + sum->cnt[i] += blkg_rwstat_read_counter(rwstat, i); } rcu_read_unlock(); } diff --git a/block/blk-merge.c b/block/blk-merge.c index 808768f6b174..756473295f19 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -383,6 +383,14 @@ unsigned int blk_recalc_rq_segments(struct request *rq) switch (bio_op(rq->bio)) { case REQ_OP_DISCARD: case REQ_OP_SECURE_ERASE: + if (queue_max_discard_segments(rq->q) > 1) { + struct bio *bio = rq->bio; + + for_each_bio(bio) + nr_phys_segs++; + return nr_phys_segs; + } + return 1; case REQ_OP_WRITE_ZEROES: return 0; case REQ_OP_WRITE_SAME: diff --git a/block/blk-mq.c b/block/blk-mq.c index f285a9123a8b..580601787f7a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1646,6 +1646,42 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) } EXPORT_SYMBOL(blk_mq_run_hw_queue); +/* + * Is the request queue handled by an IO scheduler that does not respect + * hardware queues when dispatching? + */ +static bool blk_mq_has_sqsched(struct request_queue *q) +{ + struct elevator_queue *e = q->elevator; + + if (e && e->type->ops.dispatch_request && + !(e->type->elevator_features & ELEVATOR_F_MQ_AWARE)) + return true; + return false; +} + +/* + * Return prefered queue to dispatch from (if any) for non-mq aware IO + * scheduler. + */ +static struct blk_mq_hw_ctx *blk_mq_get_sq_hctx(struct request_queue *q) +{ + struct blk_mq_hw_ctx *hctx; + + /* + * If the IO scheduler does not respect hardware queues when + * dispatching, we just don't bother with multiple HW queues and + * dispatch from hctx for the current CPU since running multiple queues + * just causes lock contention inside the scheduler and pointless cache + * bouncing. + */ + hctx = blk_mq_map_queue_type(q, HCTX_TYPE_DEFAULT, + raw_smp_processor_id()); + if (!blk_mq_hctx_stopped(hctx)) + return hctx; + return NULL; +} + /** * blk_mq_run_hw_queues - Run all hardware queues in a request queue. * @q: Pointer to the request queue to run. @@ -1653,14 +1689,23 @@ EXPORT_SYMBOL(blk_mq_run_hw_queue); */ void blk_mq_run_hw_queues(struct request_queue *q, bool async) { - struct blk_mq_hw_ctx *hctx; + struct blk_mq_hw_ctx *hctx, *sq_hctx; int i; + sq_hctx = NULL; + if (blk_mq_has_sqsched(q)) + sq_hctx = blk_mq_get_sq_hctx(q); queue_for_each_hw_ctx(q, hctx, i) { if (blk_mq_hctx_stopped(hctx)) continue; - - blk_mq_run_hw_queue(hctx, async); + /* + * Dispatch from this hctx either if there's no hctx preferred + * by IO scheduler or if it has requests that bypass the + * scheduler. + */ + if (!sq_hctx || sq_hctx == hctx || + !list_empty_careful(&hctx->dispatch)) + blk_mq_run_hw_queue(hctx, async); } } EXPORT_SYMBOL(blk_mq_run_hw_queues); @@ -1672,14 +1717,23 @@ EXPORT_SYMBOL(blk_mq_run_hw_queues); */ void blk_mq_delay_run_hw_queues(struct request_queue *q, unsigned long msecs) { - struct blk_mq_hw_ctx *hctx; + struct blk_mq_hw_ctx *hctx, *sq_hctx; int i; + sq_hctx = NULL; + if (blk_mq_has_sqsched(q)) + sq_hctx = blk_mq_get_sq_hctx(q); queue_for_each_hw_ctx(q, hctx, i) { if (blk_mq_hctx_stopped(hctx)) continue; - - blk_mq_delay_run_hw_queue(hctx, msecs); + /* + * Dispatch from this hctx either if there's no hctx preferred + * by IO scheduler or if it has requests that bypass the + * scheduler. + */ + if (!sq_hctx || sq_hctx == hctx || + !list_empty_careful(&hctx->dispatch)) + blk_mq_delay_run_hw_queue(hctx, msecs); } } EXPORT_SYMBOL(blk_mq_delay_run_hw_queues); @@ -2653,7 +2707,6 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, goto free_hctx; atomic_set(&hctx->nr_active, 0); - atomic_set(&hctx->elevator_queued, 0); if (node == NUMA_NO_NODE) node = set->numa_node; hctx->numa_node = node; diff --git a/block/blk-settings.c b/block/blk-settings.c index 43990b1d148b..89447d32d9ea 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -481,6 +481,14 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt) } EXPORT_SYMBOL(blk_queue_io_opt); +static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lbs) +{ + sectors = round_down(sectors, lbs >> SECTOR_SHIFT); + if (sectors < PAGE_SIZE >> SECTOR_SHIFT) + sectors = PAGE_SIZE >> SECTOR_SHIFT; + return sectors; +} + /** * blk_stack_limits - adjust queue_limits for stacked devices * @t: the stacking driver limits (top device) @@ -607,6 +615,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, ret = -1; } + t->max_sectors = blk_round_down_sectors(t->max_sectors, t->logical_block_size); + t->max_hw_sectors = blk_round_down_sectors(t->max_hw_sectors, t->logical_block_size); + t->max_dev_sectors = blk_round_down_sectors(t->max_dev_sectors, t->logical_block_size); + /* Discard alignment and granularity */ if (b->discard_granularity) { alignment = queue_limit_discard_alignment(b, start); diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 7a68b6e4300c..fc925f73d694 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -240,7 +240,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, */ if (op == REQ_OP_ZONE_RESET && blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) { - bio->bi_opf = REQ_OP_ZONE_RESET_ALL; + bio->bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC; break; } @@ -318,6 +318,22 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, return 0; } +static int blkdev_truncate_zone_range(struct block_device *bdev, fmode_t mode, + const struct blk_zone_range *zrange) +{ + loff_t start, end; + + if (zrange->sector + zrange->nr_sectors <= zrange->sector || + zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk)) + /* Out of range */ + return -EINVAL; + + start = zrange->sector << SECTOR_SHIFT; + end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1; + + return truncate_bdev_range(bdev, mode, start, end); +} + /* * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing. * Called from blkdev_ioctl. @@ -329,6 +345,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, struct request_queue *q; struct blk_zone_range zrange; enum req_opf op; + int ret; if (!argp) return -EINVAL; @@ -352,6 +369,11 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, switch (cmd) { case BLKRESETZONE: op = REQ_OP_ZONE_RESET; + + /* Invalidate the page cache, including dirty pages. */ + ret = blkdev_truncate_zone_range(bdev, mode, &zrange); + if (ret) + return ret; break; case BLKOPENZONE: op = REQ_OP_ZONE_OPEN; @@ -366,8 +388,20 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, return -ENOTTY; } - return blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors, - GFP_KERNEL); + ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors, + GFP_KERNEL); + + /* + * Invalidate the page cache again for zone reset: writes can only be + * direct for zoned devices so concurrent writes would not add any page + * to the page cache after/during reset. The page cache may be filled + * again due to concurrent reads though and dropping the pages for + * these is fine. + */ + if (!ret && cmd == BLKRESETZONE) + ret = blkdev_truncate_zone_range(bdev, mode, &zrange); + + return ret; } static inline unsigned long *blk_alloc_zone_bitmap(int node, diff --git a/block/bsg.c b/block/bsg.c index d7bae94b64d9..3d78e843a83f 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -157,8 +157,10 @@ static int bsg_sg_io(struct request_queue *q, fmode_t mode, void __user *uarg) return PTR_ERR(rq); ret = q->bsg_dev.ops->fill_hdr(rq, &hdr, mode); - if (ret) + if (ret) { + blk_put_request(rq); return ret; + } rq->timeout = msecs_to_jiffies(hdr.timeout); if (!rq->timeout) diff --git a/block/genhd.c b/block/genhd.c index 9e741a4f351b..12940cfa68af 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -74,7 +74,7 @@ bool set_capacity_and_notify(struct gendisk *disk, sector_t size) return false; pr_info("%s: detected capacity change from %lld to %lld\n", - disk->disk_name, size, capacity); + disk->disk_name, capacity, size); /* * Historically we did not send a uevent for changes to/from an empty @@ -658,10 +658,8 @@ static void register_disk(struct device *parent, struct gendisk *disk, kobject_create_and_add("holders", &ddev->kobj); disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); - if (disk->flags & GENHD_FL_HIDDEN) { - dev_set_uevent_suppress(ddev, 0); + if (disk->flags & GENHD_FL_HIDDEN) return; - } disk_scan_partitions(disk); diff --git a/block/ioctl.c b/block/ioctl.c index d61d652078f4..ff241e663c01 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -81,20 +81,27 @@ static int compat_blkpg_ioctl(struct block_device *bdev, } #endif -static int blkdev_reread_part(struct block_device *bdev) +static int blkdev_reread_part(struct block_device *bdev, fmode_t mode) { - int ret; + struct block_device *tmp; if (!disk_part_scan_enabled(bdev->bd_disk) || bdev_is_partition(bdev)) return -EINVAL; if (!capable(CAP_SYS_ADMIN)) return -EACCES; - mutex_lock(&bdev->bd_mutex); - ret = bdev_disk_changed(bdev, false); - mutex_unlock(&bdev->bd_mutex); + /* + * Reopen the device to revalidate the driver state and force a + * partition rescan. + */ + mode &= ~FMODE_EXCL; + set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); - return ret; + tmp = blkdev_get_by_dev(bdev->bd_dev, mode, NULL); + if (IS_ERR(tmp)) + return PTR_ERR(tmp); + blkdev_put(tmp, mode); + return 0; } static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, @@ -498,7 +505,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode, bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE; return 0; case BLKRRPART: - return blkdev_reread_part(bdev); + return blkdev_reread_part(bdev, mode); case BLKTRACESTART: case BLKTRACESTOP: case BLKTRACETEARDOWN: diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index dc89199bc8c6..c25c41d0d061 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -1029,6 +1029,7 @@ static struct elevator_type kyber_sched = { #endif .elevator_attrs = kyber_sched_attrs, .elevator_name = "kyber", + .elevator_features = ELEVATOR_F_MQ_AWARE, .elevator_owner = THIS_MODULE, }; diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 800ac902809b..b57470e154c8 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -386,8 +386,6 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx) spin_lock(&dd->lock); rq = __dd_dispatch_request(dd); spin_unlock(&dd->lock); - if (rq) - atomic_dec(&rq->mq_hctx->elevator_queued); return rq; } @@ -535,7 +533,6 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx, rq = list_first_entry(list, struct request, queuelist); list_del_init(&rq->queuelist); dd_insert_request(hctx, rq, at_head); - atomic_inc(&hctx->elevator_queued); } spin_unlock(&dd->lock); } @@ -582,9 +579,6 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx) { struct deadline_data *dd = hctx->queue->elevator->elevator_data; - if (!atomic_read(&hctx->elevator_queued)) - return false; - return !list_empty_careful(&dd->dispatch) || !list_empty_careful(&dd->fifo_list[0]) || !list_empty_careful(&dd->fifo_list[1]); diff --git a/certs/blacklist.c b/certs/blacklist.c index 6514f9ebc943..f1c434b04b5e 100644 --- a/certs/blacklist.c +++ b/certs/blacklist.c @@ -162,7 +162,7 @@ static int __init blacklist_init(void) KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH, KEY_ALLOC_NOT_IN_QUOTA | - KEY_FLAG_KEEP, + KEY_ALLOC_SET_KEEP, NULL, NULL); if (IS_ERR(blacklist_keyring)) panic("Can't allocate system blacklist keyring\n"); diff --git a/crypto/Kconfig b/crypto/Kconfig index a367fcfeb5d4..3913e409ba88 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -772,7 +772,7 @@ config CRYPTO_POLY1305_X86_64 config CRYPTO_POLY1305_MIPS tristate "Poly1305 authenticator algorithm (MIPS optimized)" - depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT) + depends on MIPS select CRYPTO_ARCH_HAVE_LIB_POLY1305 config CRYPTO_MD4 diff --git a/crypto/ecdh_helper.c b/crypto/ecdh_helper.c index 66fcb2ea8154..fca63b559f65 100644 --- a/crypto/ecdh_helper.c +++ b/crypto/ecdh_helper.c @@ -67,6 +67,9 @@ int crypto_ecdh_decode_key(const char *buf, unsigned int len, if (secret.type != CRYPTO_KPP_SECRET_TYPE_ECDH) return -EINVAL; + if (unlikely(len < secret.len)) + return -EINVAL; + ptr = ecdh_unpack_data(¶ms->curve_id, ptr, sizeof(params->curve_id)); ptr = ecdh_unpack_data(¶ms->key_size, ptr, sizeof(params->key_size)); if (secret.len != crypto_ecdh_key_len(params)) diff --git a/crypto/michael_mic.c b/crypto/michael_mic.c index 63350c4ad461..f4c31049601c 100644 --- a/crypto/michael_mic.c +++ b/crypto/michael_mic.c @@ -7,7 +7,7 @@ * Copyright (c) 2004 Jouni Malinen */ #include -#include +#include #include #include #include @@ -19,7 +19,7 @@ struct michael_mic_ctx { }; struct michael_mic_desc_ctx { - u8 pending[4]; + __le32 pending; size_t pending_len; u32 l, r; @@ -60,13 +60,12 @@ static int michael_update(struct shash_desc *desc, const u8 *data, unsigned int len) { struct michael_mic_desc_ctx *mctx = shash_desc_ctx(desc); - const __le32 *src; if (mctx->pending_len) { int flen = 4 - mctx->pending_len; if (flen > len) flen = len; - memcpy(&mctx->pending[mctx->pending_len], data, flen); + memcpy((u8 *)&mctx->pending + mctx->pending_len, data, flen); mctx->pending_len += flen; data += flen; len -= flen; @@ -74,23 +73,21 @@ static int michael_update(struct shash_desc *desc, const u8 *data, if (mctx->pending_len < 4) return 0; - src = (const __le32 *)mctx->pending; - mctx->l ^= le32_to_cpup(src); + mctx->l ^= le32_to_cpu(mctx->pending); michael_block(mctx->l, mctx->r); mctx->pending_len = 0; } - src = (const __le32 *)data; - while (len >= 4) { - mctx->l ^= le32_to_cpup(src++); + mctx->l ^= get_unaligned_le32(data); michael_block(mctx->l, mctx->r); + data += 4; len -= 4; } if (len > 0) { mctx->pending_len = len; - memcpy(mctx->pending, src, len); + memcpy(&mctx->pending, data, len); } return 0; @@ -100,8 +97,7 @@ static int michael_update(struct shash_desc *desc, const u8 *data, static int michael_final(struct shash_desc *desc, u8 *out) { struct michael_mic_desc_ctx *mctx = shash_desc_ctx(desc); - u8 *data = mctx->pending; - __le32 *dst = (__le32 *)out; + u8 *data = (u8 *)&mctx->pending; /* Last block and padding (0x5a, 4..7 x 0) */ switch (mctx->pending_len) { @@ -123,8 +119,8 @@ static int michael_final(struct shash_desc *desc, u8 *out) /* l ^= 0; */ michael_block(mctx->l, mctx->r); - dst[0] = cpu_to_le32(mctx->l); - dst[1] = cpu_to_le32(mctx->r); + put_unaligned_le32(mctx->l, out); + put_unaligned_le32(mctx->r, out + 4); return 0; } @@ -135,13 +131,11 @@ static int michael_setkey(struct crypto_shash *tfm, const u8 *key, { struct michael_mic_ctx *mctx = crypto_shash_ctx(tfm); - const __le32 *data = (const __le32 *)key; - if (keylen != 8) return -EINVAL; - mctx->l = le32_to_cpu(data[0]); - mctx->r = le32_to_cpu(data[1]); + mctx->l = get_unaligned_le32(key); + mctx->r = get_unaligned_le32(key + 4); return 0; } @@ -156,7 +150,6 @@ static struct shash_alg alg = { .cra_name = "michael_mic", .cra_driver_name = "michael_mic-generic", .cra_blocksize = 8, - .cra_alignmask = 3, .cra_ctxsize = sizeof(struct michael_mic_ctx), .cra_module = THIS_MODULE, } diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index a647bb298fbc..a4a11d2b57bd 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -199,8 +199,8 @@ static int test_mb_aead_jiffies(struct test_mb_aead_data *data, int enc, goto out; } - pr_cont("%d operations in %d seconds (%ld bytes)\n", - bcount * num_mb, secs, (long)bcount * blen * num_mb); + pr_cont("%d operations in %d seconds (%llu bytes)\n", + bcount * num_mb, secs, (u64)bcount * blen * num_mb); out: kfree(rc); @@ -471,8 +471,8 @@ static int test_aead_jiffies(struct aead_request *req, int enc, return ret; } - printk("%d operations in %d seconds (%ld bytes)\n", - bcount, secs, (long)bcount * blen); + pr_cont("%d operations in %d seconds (%llu bytes)\n", + bcount, secs, (u64)bcount * blen); return 0; } @@ -764,8 +764,8 @@ static int test_mb_ahash_jiffies(struct test_mb_ahash_data *data, int blen, goto out; } - pr_cont("%d operations in %d seconds (%ld bytes)\n", - bcount * num_mb, secs, (long)bcount * blen * num_mb); + pr_cont("%d operations in %d seconds (%llu bytes)\n", + bcount * num_mb, secs, (u64)bcount * blen * num_mb); out: kfree(rc); @@ -1201,8 +1201,8 @@ static int test_mb_acipher_jiffies(struct test_mb_skcipher_data *data, int enc, goto out; } - pr_cont("%d operations in %d seconds (%ld bytes)\n", - bcount * num_mb, secs, (long)bcount * blen * num_mb); + pr_cont("%d operations in %d seconds (%llu bytes)\n", + bcount * num_mb, secs, (u64)bcount * blen * num_mb); out: kfree(rc); @@ -1441,8 +1441,8 @@ static int test_acipher_jiffies(struct skcipher_request *req, int enc, return ret; } - pr_cont("%d operations in %d seconds (%ld bytes)\n", - bcount, secs, (long)bcount * blen); + pr_cont("%d operations in %d seconds (%llu bytes)\n", + bcount, secs, (u64)bcount * blen); return 0; } diff --git a/crypto/zstd.c b/crypto/zstd.c index 1a3309f066f7..154a969c83a8 100644 --- a/crypto/zstd.c +++ b/crypto/zstd.c @@ -18,22 +18,22 @@ #define ZSTD_DEF_LEVEL 3 struct zstd_ctx { - ZSTD_CCtx *cctx; - ZSTD_DCtx *dctx; + zstd_cctx *cctx; + zstd_dctx *dctx; void *cwksp; void *dwksp; }; -static ZSTD_parameters zstd_params(void) +static zstd_parameters zstd_params(void) { - return ZSTD_getParams(ZSTD_DEF_LEVEL, 0, 0); + return zstd_get_params(ZSTD_DEF_LEVEL, 0); } static int zstd_comp_init(struct zstd_ctx *ctx) { int ret = 0; - const ZSTD_parameters params = zstd_params(); - const size_t wksp_size = ZSTD_CCtxWorkspaceBound(params.cParams); + const zstd_parameters params = zstd_params(); + const size_t wksp_size = zstd_cctx_workspace_bound(¶ms.cParams); ctx->cwksp = vzalloc(wksp_size); if (!ctx->cwksp) { @@ -41,7 +41,7 @@ static int zstd_comp_init(struct zstd_ctx *ctx) goto out; } - ctx->cctx = ZSTD_initCCtx(ctx->cwksp, wksp_size); + ctx->cctx = zstd_init_cctx(ctx->cwksp, wksp_size); if (!ctx->cctx) { ret = -EINVAL; goto out_free; @@ -56,7 +56,7 @@ static int zstd_comp_init(struct zstd_ctx *ctx) static int zstd_decomp_init(struct zstd_ctx *ctx) { int ret = 0; - const size_t wksp_size = ZSTD_DCtxWorkspaceBound(); + const size_t wksp_size = zstd_dctx_workspace_bound(); ctx->dwksp = vzalloc(wksp_size); if (!ctx->dwksp) { @@ -64,7 +64,7 @@ static int zstd_decomp_init(struct zstd_ctx *ctx) goto out; } - ctx->dctx = ZSTD_initDCtx(ctx->dwksp, wksp_size); + ctx->dctx = zstd_init_dctx(ctx->dwksp, wksp_size); if (!ctx->dctx) { ret = -EINVAL; goto out_free; @@ -152,10 +152,10 @@ static int __zstd_compress(const u8 *src, unsigned int slen, { size_t out_len; struct zstd_ctx *zctx = ctx; - const ZSTD_parameters params = zstd_params(); + const zstd_parameters params = zstd_params(); - out_len = ZSTD_compressCCtx(zctx->cctx, dst, *dlen, src, slen, params); - if (ZSTD_isError(out_len)) + out_len = zstd_compress_cctx(zctx->cctx, dst, *dlen, src, slen, ¶ms); + if (zstd_is_error(out_len)) return -EINVAL; *dlen = out_len; return 0; @@ -182,8 +182,8 @@ static int __zstd_decompress(const u8 *src, unsigned int slen, size_t out_len; struct zstd_ctx *zctx = ctx; - out_len = ZSTD_decompressDCtx(zctx->dctx, dst, *dlen, src, slen); - if (ZSTD_isError(out_len)) + out_len = zstd_decompress_dctx(zctx->dctx, dst, *dlen, src, slen); + if (zstd_is_error(out_len)) return -EINVAL; *dlen = out_len; return 0; diff --git a/drivers/acpi/acpi_configfs.c b/drivers/acpi/acpi_configfs.c index cf91f49101ea..3a14859dbb75 100644 --- a/drivers/acpi/acpi_configfs.c +++ b/drivers/acpi/acpi_configfs.c @@ -268,7 +268,12 @@ static int __init acpi_configfs_init(void) acpi_table_group = configfs_register_default_group(root, "table", &acpi_tables_type); - return PTR_ERR_OR_ZERO(acpi_table_group); + if (IS_ERR(acpi_table_group)) { + configfs_unregister_subsystem(&acpi_configfs); + return PTR_ERR(acpi_table_group); + } + + return 0; } module_init(acpi_configfs_init); diff --git a/drivers/acpi/acpica/acobject.h b/drivers/acpi/acpica/acobject.h index 9f0219a8cb98..dd7efafcb103 100644 --- a/drivers/acpi/acpica/acobject.h +++ b/drivers/acpi/acpica/acobject.h @@ -284,6 +284,7 @@ struct acpi_object_addr_handler { acpi_adr_space_handler handler; struct acpi_namespace_node *node; /* Parent device */ void *context; + acpi_mutex context_mutex; acpi_adr_space_setup setup; union acpi_operand_object *region_list; /* Regions using this handler */ union acpi_operand_object *next; diff --git a/drivers/acpi/acpica/evhandler.c b/drivers/acpi/acpica/evhandler.c index 5884eba047f7..3438dc187efb 100644 --- a/drivers/acpi/acpica/evhandler.c +++ b/drivers/acpi/acpica/evhandler.c @@ -489,6 +489,13 @@ acpi_ev_install_space_handler(struct acpi_namespace_node *node, /* Init handler obj */ + status = + acpi_os_create_mutex(&handler_obj->address_space.context_mutex); + if (ACPI_FAILURE(status)) { + acpi_ut_remove_reference(handler_obj); + goto unlock_and_exit; + } + handler_obj->address_space.space_id = (u8)space_id; handler_obj->address_space.handler_flags = flags; handler_obj->address_space.region_list = NULL; diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index a8a4c8c9b9ef..7701ae67e091 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -112,6 +112,8 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj, union acpi_operand_object *region_obj2; void *region_context = NULL; struct acpi_connection_info *context; + acpi_mutex context_mutex; + u8 context_locked; acpi_physical_address address; ACPI_FUNCTION_TRACE(ev_address_space_dispatch); @@ -136,6 +138,8 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj, } context = handler_desc->address_space.context; + context_mutex = handler_desc->address_space.context_mutex; + context_locked = FALSE; /* * It may be the case that the region has never been initialized. @@ -204,6 +208,23 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj, handler = handler_desc->address_space.handler; address = (region_obj->region.address + region_offset); + ACPI_DEBUG_PRINT((ACPI_DB_OPREGION, + "Handler %p (@%p) Address %8.8X%8.8X [%s]\n", + ®ion_obj->region.handler->address_space, handler, + ACPI_FORMAT_UINT64(address), + acpi_ut_get_region_name(region_obj->region. + space_id))); + + if (!(handler_desc->address_space.handler_flags & + ACPI_ADDR_HANDLER_DEFAULT_INSTALLED)) { + /* + * For handlers other than the default (supplied) handlers, we must + * exit the interpreter because the handler *might* block -- we don't + * know what it will do, so we can't hold the lock on the interpreter. + */ + acpi_ex_exit_interpreter(); + } + /* * Special handling for generic_serial_bus and general_purpose_io: * There are three extra parameters that must be passed to the @@ -212,6 +233,11 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj, * 2) Length of the above buffer * 3) Actual access length from the access_as() op * + * Since we pass these extra parameters via the context, which is + * shared between threads, we must lock the context to avoid these + * parameters being changed from another thread before the handler + * has completed running. + * * In addition, for general_purpose_io, the Address and bit_width fields * are defined as follows: * 1) Address is the pin number index of the field (bit offset from @@ -221,6 +247,14 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj, if ((region_obj->region.space_id == ACPI_ADR_SPACE_GSBUS) && context && field_obj) { + status = + acpi_os_acquire_mutex(context_mutex, ACPI_WAIT_FOREVER); + if (ACPI_FAILURE(status)) { + goto re_enter_interpreter; + } + + context_locked = TRUE; + /* Get the Connection (resource_template) buffer */ context->connection = field_obj->field.resource_buffer; @@ -230,6 +264,14 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj, if ((region_obj->region.space_id == ACPI_ADR_SPACE_GPIO) && context && field_obj) { + status = + acpi_os_acquire_mutex(context_mutex, ACPI_WAIT_FOREVER); + if (ACPI_FAILURE(status)) { + goto re_enter_interpreter; + } + + context_locked = TRUE; + /* Get the Connection (resource_template) buffer */ context->connection = field_obj->field.resource_buffer; @@ -239,28 +281,15 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj, bit_width = field_obj->field.bit_length; } - ACPI_DEBUG_PRINT((ACPI_DB_OPREGION, - "Handler %p (@%p) Address %8.8X%8.8X [%s]\n", - ®ion_obj->region.handler->address_space, handler, - ACPI_FORMAT_UINT64(address), - acpi_ut_get_region_name(region_obj->region. - space_id))); - - if (!(handler_desc->address_space.handler_flags & - ACPI_ADDR_HANDLER_DEFAULT_INSTALLED)) { - /* - * For handlers other than the default (supplied) handlers, we must - * exit the interpreter because the handler *might* block -- we don't - * know what it will do, so we can't hold the lock on the interpreter. - */ - acpi_ex_exit_interpreter(); - } - /* Call the handler */ status = handler(function, address, bit_width, value, context, region_obj2->extra.region_context); + if (context_locked) { + acpi_os_release_mutex(context_mutex); + } + if (ACPI_FAILURE(status)) { ACPI_EXCEPTION((AE_INFO, status, "Returned by Handler for [%s]", acpi_ut_get_region_name(region_obj->region. @@ -277,6 +306,7 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj, } } +re_enter_interpreter: if (!(handler_desc->address_space.handler_flags & ACPI_ADDR_HANDLER_DEFAULT_INSTALLED)) { /* diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index da97fd0c6b51..3bb06f17a18b 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -201,6 +201,8 @@ acpi_remove_address_space_handler(acpi_handle device, /* Now we can delete the handler object */ + acpi_os_release_mutex(handler_obj->address_space. + context_mutex); acpi_ut_remove_reference(handler_obj); goto unlock_and_exit; } diff --git a/drivers/acpi/acpica/nsaccess.c b/drivers/acpi/acpica/nsaccess.c index 3f045b5953b2..a0c1a665dfc1 100644 --- a/drivers/acpi/acpica/nsaccess.c +++ b/drivers/acpi/acpica/nsaccess.c @@ -99,13 +99,12 @@ acpi_status acpi_ns_root_initialize(void) * just create and link the new node(s) here. */ new_node = - ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_namespace_node)); + acpi_ns_create_node(*ACPI_CAST_PTR(u32, init_val->name)); if (!new_node) { status = AE_NO_MEMORY; goto unlock_and_exit; } - ACPI_COPY_NAMESEG(new_node->name.ascii, init_val->name); new_node->descriptor_type = ACPI_DESC_TYPE_NAMED; new_node->type = init_val->type; diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index e6a5d997241c..cb8f70842249 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -9,6 +9,8 @@ #ifndef _ACPI_INTERNAL_H_ #define _ACPI_INTERNAL_H_ +#include + #define PREFIX "ACPI: " int early_acpi_osi_init(void); @@ -96,9 +98,11 @@ void acpi_scan_table_handler(u32 event, void *table, void *context); extern struct list_head acpi_bus_id_list; +#define ACPI_MAX_DEVICE_INSTANCES 4096 + struct acpi_device_bus_id { const char *bus_id; - unsigned int instance_no; + struct ida instance_ida; struct list_head node; }; diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c index 24e87b630573..16b28084c1ca 100644 --- a/drivers/acpi/property.c +++ b/drivers/acpi/property.c @@ -787,9 +787,6 @@ static int acpi_data_prop_read_single(const struct acpi_device_data *data, const union acpi_object *obj; int ret; - if (!val) - return -EINVAL; - if (proptype >= DEV_PROP_U8 && proptype <= DEV_PROP_U64) { ret = acpi_data_get_property(data, propname, ACPI_TYPE_INTEGER, &obj); if (ret) @@ -799,28 +796,43 @@ static int acpi_data_prop_read_single(const struct acpi_device_data *data, case DEV_PROP_U8: if (obj->integer.value > U8_MAX) return -EOVERFLOW; - *(u8 *)val = obj->integer.value; + + if (val) + *(u8 *)val = obj->integer.value; + break; case DEV_PROP_U16: if (obj->integer.value > U16_MAX) return -EOVERFLOW; - *(u16 *)val = obj->integer.value; + + if (val) + *(u16 *)val = obj->integer.value; + break; case DEV_PROP_U32: if (obj->integer.value > U32_MAX) return -EOVERFLOW; - *(u32 *)val = obj->integer.value; + + if (val) + *(u32 *)val = obj->integer.value; + break; default: - *(u64 *)val = obj->integer.value; + if (val) + *(u64 *)val = obj->integer.value; + break; } + + if (!val) + return 1; } else if (proptype == DEV_PROP_STRING) { ret = acpi_data_get_property(data, propname, ACPI_TYPE_STRING, &obj); if (ret) return ret; - *(char **)val = obj->string.pointer; + if (val) + *(char **)val = obj->string.pointer; return 1; } else { @@ -834,7 +846,7 @@ int acpi_dev_prop_read_single(struct acpi_device *adev, const char *propname, { int ret; - if (!adev) + if (!adev || !val) return -EINVAL; ret = acpi_data_prop_read_single(&adev->data, propname, proptype, val); @@ -928,10 +940,20 @@ static int acpi_data_prop_read(const struct acpi_device_data *data, const union acpi_object *items; int ret; - if (val && nval == 1) { + if (nval == 1 || !val) { ret = acpi_data_prop_read_single(data, propname, proptype, val); - if (ret >= 0) + /* + * The overflow error means that the property is there and it is + * single-value, but its type does not match, so return. + */ + if (ret >= 0 || ret == -EOVERFLOW) return ret; + + /* + * Reading this property as a single-value one failed, but its + * value may still be represented as one-element array, so + * continue. + */ } ret = acpi_data_get_property_array(data, propname, ACPI_TYPE_ANY, &obj); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 22566b4b3150..a4fdf61b0644 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -482,9 +482,8 @@ static void acpi_device_del(struct acpi_device *device) list_for_each_entry(acpi_device_bus_id, &acpi_bus_id_list, node) if (!strcmp(acpi_device_bus_id->bus_id, acpi_device_hid(device))) { - if (acpi_device_bus_id->instance_no > 0) - acpi_device_bus_id->instance_no--; - else { + ida_simple_remove(&acpi_device_bus_id->instance_ida, device->pnp.instance_no); + if (ida_is_empty(&acpi_device_bus_id->instance_ida)) { list_del(&acpi_device_bus_id->node); kfree_const(acpi_device_bus_id->bus_id); kfree(acpi_device_bus_id); @@ -623,12 +622,38 @@ void acpi_bus_put_acpi_device(struct acpi_device *adev) put_device(&adev->dev); } +static struct acpi_device_bus_id *acpi_device_bus_id_match(const char *dev_id) +{ + struct acpi_device_bus_id *acpi_device_bus_id; + + /* Find suitable bus_id and instance number in acpi_bus_id_list. */ + list_for_each_entry(acpi_device_bus_id, &acpi_bus_id_list, node) { + if (!strcmp(acpi_device_bus_id->bus_id, dev_id)) + return acpi_device_bus_id; + } + return NULL; +} + +static int acpi_device_set_name(struct acpi_device *device, + struct acpi_device_bus_id *acpi_device_bus_id) +{ + struct ida *instance_ida = &acpi_device_bus_id->instance_ida; + int result; + + result = ida_simple_get(instance_ida, 0, ACPI_MAX_DEVICE_INSTANCES, GFP_KERNEL); + if (result < 0) + return result; + + device->pnp.instance_no = result; + dev_set_name(&device->dev, "%s:%02x", acpi_device_bus_id->bus_id, result); + return 0; +} + int acpi_device_add(struct acpi_device *device, void (*release)(struct device *)) { + struct acpi_device_bus_id *acpi_device_bus_id; int result; - struct acpi_device_bus_id *acpi_device_bus_id, *new_bus_id; - int found = 0; if (device->handle) { acpi_status status; @@ -654,41 +679,38 @@ int acpi_device_add(struct acpi_device *device, INIT_LIST_HEAD(&device->del_list); mutex_init(&device->physical_node_lock); - new_bus_id = kzalloc(sizeof(struct acpi_device_bus_id), GFP_KERNEL); - if (!new_bus_id) { - pr_err(PREFIX "Memory allocation error\n"); - result = -ENOMEM; - goto err_detach; - } - mutex_lock(&acpi_device_lock); - /* - * Find suitable bus_id and instance number in acpi_bus_id_list - * If failed, create one and link it into acpi_bus_id_list - */ - list_for_each_entry(acpi_device_bus_id, &acpi_bus_id_list, node) { - if (!strcmp(acpi_device_bus_id->bus_id, - acpi_device_hid(device))) { - acpi_device_bus_id->instance_no++; - found = 1; - kfree(new_bus_id); - break; + + acpi_device_bus_id = acpi_device_bus_id_match(acpi_device_hid(device)); + if (acpi_device_bus_id) { + result = acpi_device_set_name(device, acpi_device_bus_id); + if (result) + goto err_unlock; + } else { + acpi_device_bus_id = kzalloc(sizeof(*acpi_device_bus_id), + GFP_KERNEL); + if (!acpi_device_bus_id) { + result = -ENOMEM; + goto err_unlock; } - } - if (!found) { - acpi_device_bus_id = new_bus_id; acpi_device_bus_id->bus_id = kstrdup_const(acpi_device_hid(device), GFP_KERNEL); if (!acpi_device_bus_id->bus_id) { - pr_err(PREFIX "Memory allocation error for bus id\n"); + kfree(acpi_device_bus_id); result = -ENOMEM; - goto err_free_new_bus_id; + goto err_unlock; + } + + ida_init(&acpi_device_bus_id->instance_ida); + + result = acpi_device_set_name(device, acpi_device_bus_id); + if (result) { + kfree(acpi_device_bus_id); + goto err_unlock; } - acpi_device_bus_id->instance_no = 0; list_add_tail(&acpi_device_bus_id->node, &acpi_bus_id_list); } - dev_set_name(&device->dev, "%s:%02x", acpi_device_bus_id->bus_id, acpi_device_bus_id->instance_no); if (device->parent) list_add_tail(&device->node, &device->parent->children); @@ -720,13 +742,9 @@ int acpi_device_add(struct acpi_device *device, list_del(&device->node); list_del(&device->wakeup_list); - err_free_new_bus_id: - if (!found) - kfree(new_bus_id); - + err_unlock: mutex_unlock(&acpi_device_lock); - err_detach: acpi_detach_data(device->handle, acpi_scan_drop_device); return result; } diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index 811d298637cb..83cd4c95faf0 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -147,6 +147,7 @@ static const struct dmi_system_id video_detect_dmi_table[] = { }, }, { + .callback = video_detect_force_vendor, .ident = "Sony VPCEH3U1E", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"), diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index ecc304149067..b5f5ca4e3f34 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -299,10 +299,11 @@ static int amba_remove(struct device *dev) { struct amba_device *pcdev = to_amba_device(dev); struct amba_driver *drv = to_amba_driver(dev->driver); - int ret; + int ret = 0; pm_runtime_get_sync(dev); - ret = drv->remove(pcdev); + if (drv->remove) + ret = drv->remove(pcdev); pm_runtime_put_noidle(dev); /* Undo the runtime PM settings in amba_probe() */ @@ -319,7 +320,9 @@ static int amba_remove(struct device *dev) static void amba_shutdown(struct device *dev) { struct amba_driver *drv = to_amba_driver(dev->driver); - drv->shutdown(to_amba_device(dev)); + + if (drv->shutdown) + drv->shutdown(to_amba_device(dev)); } /** @@ -332,12 +335,13 @@ static void amba_shutdown(struct device *dev) */ int amba_driver_register(struct amba_driver *drv) { - drv->drv.bus = &amba_bustype; + if (!drv->probe) + return -EINVAL; -#define SETFN(fn) if (drv->fn) drv->drv.fn = amba_##fn - SETFN(probe); - SETFN(remove); - SETFN(shutdown); + drv->drv.bus = &amba_bustype; + drv->drv.probe = amba_probe; + drv->drv.remove = amba_remove; + drv->drv.shutdown = amba_shutdown; return driver_register(&drv->drv); } diff --git a/drivers/ata/ahci_brcm.c b/drivers/ata/ahci_brcm.c index 49f7acbfcf01..5b32df5d33ad 100644 --- a/drivers/ata/ahci_brcm.c +++ b/drivers/ata/ahci_brcm.c @@ -377,6 +377,10 @@ static int __maybe_unused brcm_ahci_resume(struct device *dev) if (ret) return ret; + ret = ahci_platform_enable_regulators(hpriv); + if (ret) + goto out_disable_clks; + brcm_sata_init(priv); brcm_sata_phys_enable(priv); brcm_sata_alpm_init(hpriv); @@ -406,6 +410,8 @@ static int __maybe_unused brcm_ahci_resume(struct device *dev) ahci_platform_disable_phys(hpriv); out_disable_phys: brcm_sata_phys_disable(priv); + ahci_platform_disable_regulators(hpriv); +out_disable_clks: ahci_platform_disable_clks(hpriv); return ret; } @@ -490,6 +496,10 @@ static int brcm_ahci_probe(struct platform_device *pdev) if (ret) goto out_reset; + ret = ahci_platform_enable_regulators(hpriv); + if (ret) + goto out_disable_clks; + /* Must be first so as to configure endianness including that * of the standard AHCI register space. */ @@ -499,7 +509,7 @@ static int brcm_ahci_probe(struct platform_device *pdev) priv->port_mask = brcm_ahci_get_portmask(hpriv, priv); if (!priv->port_mask) { ret = -ENODEV; - goto out_disable_clks; + goto out_disable_regulators; } /* Must be done before ahci_platform_enable_phys() */ @@ -524,6 +534,8 @@ static int brcm_ahci_probe(struct platform_device *pdev) ahci_platform_disable_phys(hpriv); out_disable_phys: brcm_sata_phys_disable(priv); +out_disable_regulators: + ahci_platform_disable_regulators(hpriv); out_disable_clks: ahci_platform_disable_clks(hpriv); out_reset: diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 316a9947541f..b574cce98dc3 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -2260,7 +2260,8 @@ static int eni_init_one(struct pci_dev *pci_dev, return rc; err_eni_release: - eni_do_release(dev); + dev->phy = NULL; + iounmap(ENI_DEV(dev)->ioaddr); err_unregister: atm_dev_deregister(dev); err_free_consistent: diff --git a/drivers/atm/idt77105.c b/drivers/atm/idt77105.c index 3c081b6171a8..bfca7b8a6f31 100644 --- a/drivers/atm/idt77105.c +++ b/drivers/atm/idt77105.c @@ -262,7 +262,7 @@ static int idt77105_start(struct atm_dev *dev) { unsigned long flags; - if (!(dev->dev_data = kmalloc(sizeof(struct idt77105_priv),GFP_KERNEL))) + if (!(dev->phy_data = kmalloc(sizeof(struct idt77105_priv),GFP_KERNEL))) return -ENOMEM; PRIV(dev)->dev = dev; spin_lock_irqsave(&idt77105_priv_lock, flags); @@ -337,7 +337,7 @@ static int idt77105_stop(struct atm_dev *dev) else idt77105_all = walk->next; dev->phy = NULL; - dev->dev_data = NULL; + dev->phy_data = NULL; kfree(walk); break; } diff --git a/drivers/atm/lanai.c b/drivers/atm/lanai.c index d7277c26e423..32d7aa141d96 100644 --- a/drivers/atm/lanai.c +++ b/drivers/atm/lanai.c @@ -2233,6 +2233,7 @@ static int lanai_dev_open(struct atm_dev *atmdev) conf1_write(lanai); #endif iounmap(lanai->base); + lanai->base = NULL; error_pci: pci_disable_device(lanai->pci); error: @@ -2245,6 +2246,8 @@ static int lanai_dev_open(struct atm_dev *atmdev) static void lanai_dev_close(struct atm_dev *atmdev) { struct lanai_dev *lanai = (struct lanai_dev *) atmdev->dev_data; + if (lanai->base==NULL) + return; printk(KERN_INFO DEV_LABEL "(itf %d): shutting down interface\n", lanai->number); lanai_timed_poll_stop(lanai); @@ -2552,7 +2555,7 @@ static int lanai_init_one(struct pci_dev *pci, struct atm_dev *atmdev; int result; - lanai = kmalloc(sizeof(*lanai), GFP_KERNEL); + lanai = kzalloc(sizeof(*lanai), GFP_KERNEL); if (lanai == NULL) { printk(KERN_ERR DEV_LABEL ": couldn't allocate dev_data structure!\n"); diff --git a/drivers/atm/uPD98402.c b/drivers/atm/uPD98402.c index 7850758b5bb8..239852d85558 100644 --- a/drivers/atm/uPD98402.c +++ b/drivers/atm/uPD98402.c @@ -211,7 +211,7 @@ static void uPD98402_int(struct atm_dev *dev) static int uPD98402_start(struct atm_dev *dev) { DPRINTK("phy_start\n"); - if (!(dev->dev_data = kmalloc(sizeof(struct uPD98402_priv),GFP_KERNEL))) + if (!(dev->phy_data = kmalloc(sizeof(struct uPD98402_priv),GFP_KERNEL))) return -ENOMEM; spin_lock_init(&PRIV(dev)->lock); memset(&PRIV(dev)->sonet_stats,0,sizeof(struct k_sonet_stats)); diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index a2b59b84bb88..1509cb74705a 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -507,6 +507,3 @@ config PANEL depends on PARPORT select AUXDISPLAY select PARPORT_PANEL - -config CHARLCD - tristate "Character LCD core support" if COMPILE_TEST diff --git a/drivers/auxdisplay/ht16k33.c b/drivers/auxdisplay/ht16k33.c index d951d54b26f5..d8602843e8a5 100644 --- a/drivers/auxdisplay/ht16k33.c +++ b/drivers/auxdisplay/ht16k33.c @@ -117,8 +117,7 @@ static void ht16k33_fb_queue(struct ht16k33_priv *priv) { struct ht16k33_fbdev *fbdev = &priv->fbdev; - schedule_delayed_work(&fbdev->work, - msecs_to_jiffies(HZ / fbdev->refresh_rate)); + schedule_delayed_work(&fbdev->work, HZ / fbdev->refresh_rate); } /* diff --git a/drivers/base/auxiliary.c b/drivers/base/auxiliary.c index 8336535f1e11..d8b314e7d0fd 100644 --- a/drivers/base/auxiliary.c +++ b/drivers/base/auxiliary.c @@ -15,6 +15,7 @@ #include #include #include +#include "base.h" static const struct auxiliary_device_id *auxiliary_match_id(const struct auxiliary_device_id *id, const struct auxiliary_device *auxdev) @@ -260,19 +261,11 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv) } EXPORT_SYMBOL_GPL(auxiliary_driver_unregister); -static int __init auxiliary_bus_init(void) +void __init auxiliary_bus_init(void) { - return bus_register(&auxiliary_bus_type); + WARN_ON(bus_register(&auxiliary_bus_type)); } -static void __exit auxiliary_bus_exit(void) -{ - bus_unregister(&auxiliary_bus_type); -} - -module_init(auxiliary_bus_init); -module_exit(auxiliary_bus_exit); - MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("Auxiliary Bus"); MODULE_AUTHOR("David Ertman "); diff --git a/drivers/base/base.h b/drivers/base/base.h index f5600a83124f..52b3d7b75c27 100644 --- a/drivers/base/base.h +++ b/drivers/base/base.h @@ -119,6 +119,11 @@ static inline int hypervisor_init(void) { return 0; } extern int platform_bus_init(void); extern void cpu_dev_init(void); extern void container_dev_init(void); +#ifdef CONFIG_AUXILIARY_BUS +extern void auxiliary_bus_init(void); +#else +static inline void auxiliary_bus_init(void) { } +#endif struct kobject *virtual_device_parent(struct device *dev); diff --git a/drivers/base/init.c b/drivers/base/init.c index 908e6520e804..a9f57c22fb9e 100644 --- a/drivers/base/init.c +++ b/drivers/base/init.c @@ -32,6 +32,7 @@ void __init driver_init(void) */ of_core_init(); platform_bus_init(); + auxiliary_bus_init(); cpu_dev_init(); memory_dev_init(); container_dev_init(); diff --git a/drivers/base/memory.c b/drivers/base/memory.c index eef4ffb6122c..de058d15b33e 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -290,20 +290,20 @@ static ssize_t state_store(struct device *dev, struct device_attribute *attr, } /* - * phys_device is a bad name for this. What I really want - * is a way to differentiate between memory ranges that - * are part of physical devices that constitute - * a complete removable unit or fru. - * i.e. do these ranges belong to the same physical device, - * s.t. if I offline all of these sections I can then - * remove the physical device? + * Legacy interface that we cannot remove: s390x exposes the storage increment + * covered by a memory block, allowing for identifying which memory blocks + * comprise a storage increment. Since a memory block spans complete + * storage increments nowadays, this interface is basically unused. Other + * archs never exposed != 0. */ static ssize_t phys_device_show(struct device *dev, struct device_attribute *attr, char *buf) { struct memory_block *mem = to_memory_block(dev); + unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); - return sysfs_emit(buf, "%d\n", mem->phys_device); + return sysfs_emit(buf, "%d\n", + arch_get_memory_phys_device(start_pfn)); } #ifdef CONFIG_MEMORY_HOTREMOVE @@ -488,11 +488,7 @@ static DEVICE_ATTR_WO(soft_offline_page); static DEVICE_ATTR_WO(hard_offline_page); #endif -/* - * Note that phys_device is optional. It is here to allow for - * differentiation between which *physical* devices each - * section belongs to... - */ +/* See phys_device_show(). */ int __weak arch_get_memory_phys_device(unsigned long start_pfn) { return 0; @@ -574,7 +570,6 @@ int register_memory(struct memory_block *memory) static int init_memory_block(unsigned long block_id, unsigned long state) { struct memory_block *mem; - unsigned long start_pfn; int ret = 0; mem = find_memory_block_by_id(block_id); @@ -588,8 +583,6 @@ static int init_memory_block(unsigned long block_id, unsigned long state) mem->start_section_nr = block_id * sections_per_block; mem->state = state; - start_pfn = section_nr_to_pfn(mem->start_section_nr); - mem->phys_device = arch_get_memory_phys_device(start_pfn); mem->nid = NUMA_NO_NODE; ret = register_memory(mem); diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c index bfda153b1a41..5ef67bacb585 100644 --- a/drivers/base/power/runtime.c +++ b/drivers/base/power/runtime.c @@ -305,7 +305,7 @@ static int rpm_get_suppliers(struct device *dev) return 0; } -static void rpm_put_suppliers(struct device *dev) +static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend) { struct device_link *link; @@ -313,10 +313,30 @@ static void rpm_put_suppliers(struct device *dev) device_links_read_lock_held()) { while (refcount_dec_not_one(&link->rpm_active)) - pm_runtime_put(link->supplier); + pm_runtime_put_noidle(link->supplier); + + if (try_to_suspend) + pm_request_idle(link->supplier); } } +static void rpm_put_suppliers(struct device *dev) +{ + __rpm_put_suppliers(dev, true); +} + +static void rpm_suspend_suppliers(struct device *dev) +{ + struct device_link *link; + int idx = device_links_read_lock(); + + list_for_each_entry_rcu(link, &dev->links.suppliers, c_node, + device_links_read_lock_held()) + pm_request_idle(link->supplier); + + device_links_read_unlock(idx); +} + /** * __rpm_callback - Run a given runtime PM callback for a given device. * @cb: Runtime PM callback to run. @@ -344,8 +364,10 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev) idx = device_links_read_lock(); retval = rpm_get_suppliers(dev); - if (retval) + if (retval) { + rpm_put_suppliers(dev); goto fail; + } device_links_read_unlock(idx); } @@ -368,9 +390,9 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev) || (dev->power.runtime_status == RPM_RESUMING && retval))) { idx = device_links_read_lock(); - fail: - rpm_put_suppliers(dev); + __rpm_put_suppliers(dev, false); +fail: device_links_read_unlock(idx); } @@ -642,8 +664,11 @@ static int rpm_suspend(struct device *dev, int rpmflags) goto out; } + if (dev->power.irq_safe) + goto out; + /* Maybe the parent is now able to suspend. */ - if (parent && !parent->power.ignore_children && !dev->power.irq_safe) { + if (parent && !parent->power.ignore_children) { spin_unlock(&dev->power.lock); spin_lock(&parent->power.lock); @@ -652,6 +677,14 @@ static int rpm_suspend(struct device *dev, int rpmflags) spin_lock(&dev->power.lock); } + /* Maybe the suppliers are now able to suspend. */ + if (dev->power.links_count > 0) { + spin_unlock_irq(&dev->power.lock); + + rpm_suspend_suppliers(dev); + + spin_lock_irq(&dev->power.lock); + } out: trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval); diff --git a/drivers/base/regmap/regmap-sdw.c b/drivers/base/regmap/regmap-sdw.c index c83be26434e7..966de8a136d9 100644 --- a/drivers/base/regmap/regmap-sdw.c +++ b/drivers/base/regmap/regmap-sdw.c @@ -13,7 +13,7 @@ static int regmap_sdw_write(void *context, unsigned int reg, unsigned int val) struct device *dev = context; struct sdw_slave *slave = dev_to_sdw_dev(dev); - return sdw_write(slave, reg, val); + return sdw_write_no_pm(slave, reg, val); } static int regmap_sdw_read(void *context, unsigned int reg, unsigned int *val) @@ -22,7 +22,7 @@ static int regmap_sdw_read(void *context, unsigned int reg, unsigned int *val) struct sdw_slave *slave = dev_to_sdw_dev(dev); int read; - read = sdw_read(slave, reg); + read = sdw_read_no_pm(slave, reg); if (read < 0) return read; diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index 4a4b2008fbc2..fbfb01ff1856 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -443,14 +443,18 @@ software_node_get_next_child(const struct fwnode_handle *fwnode, struct swnode *c = to_swnode(child); if (!p || list_empty(&p->children) || - (c && list_is_last(&c->entry, &p->children))) + (c && list_is_last(&c->entry, &p->children))) { + fwnode_handle_put(child); return NULL; + } if (c) c = list_next_entry(c, entry); else c = list_first_entry(&p->children, struct swnode, entry); - return &c->fwnode; + + fwnode_handle_put(child); + return fwnode_handle_get(&c->fwnode); } static struct fwnode_handle * @@ -782,6 +786,9 @@ int software_node_register(const struct software_node *node) if (software_node_to_swnode(node)) return -EEXIST; + if (node->parent && !parent) + return -EINVAL; + return PTR_ERR_OR_ZERO(swnode_register(node, parent, 0)); } EXPORT_SYMBOL_GPL(software_node_register); diff --git a/drivers/base/test/Makefile b/drivers/base/test/Makefile index 3ca56367c84b..2f15fae8625f 100644 --- a/drivers/base/test/Makefile +++ b/drivers/base/test/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_TEST_ASYNC_DRIVER_PROBE) += test_async_driver_probe.o obj-$(CONFIG_KUNIT_DRIVER_PE_TEST) += property-entry-test.o +CFLAGS_REMOVE_property-entry-test.o += -fplugin-arg-structleak_plugin-byref -fplugin-arg-structleak_plugin-byref-all diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index dfe1dfc901cc..0b71292d9d5a 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -4121,23 +4121,23 @@ static int floppy_open(struct block_device *bdev, fmode_t mode) if (fdc_state[FDC(drive)].rawcmd == 1) fdc_state[FDC(drive)].rawcmd = 2; - if (!(mode & FMODE_NDELAY)) { - if (mode & (FMODE_READ|FMODE_WRITE)) { - drive_state[drive].last_checked = 0; - clear_bit(FD_OPEN_SHOULD_FAIL_BIT, - &drive_state[drive].flags); - if (bdev_check_media_change(bdev)) - floppy_revalidate(bdev->bd_disk); - if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags)) - goto out; - if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags)) - goto out; - } - res = -EROFS; - if ((mode & FMODE_WRITE) && - !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags)) + if (mode & (FMODE_READ|FMODE_WRITE)) { + drive_state[drive].last_checked = 0; + clear_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags); + if (bdev_check_media_change(bdev)) + floppy_revalidate(bdev->bd_disk); + if (test_bit(FD_DISK_CHANGED_BIT, &drive_state[drive].flags)) + goto out; + if (test_bit(FD_OPEN_SHOULD_FAIL_BIT, &drive_state[drive].flags)) goto out; } + + res = -EROFS; + + if ((mode & FMODE_WRITE) && + !test_bit(FD_DISK_WRITABLE_BIT, &drive_state[drive].flags)) + goto out; + mutex_unlock(&open_lock); mutex_unlock(&floppy_mutex); return 0; diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index e6ea5d344f87..0f3bab47c0d6 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -78,8 +78,7 @@ struct link_dead_args { #define NBD_RT_HAS_PID_FILE 3 #define NBD_RT_HAS_CONFIG_REF 4 #define NBD_RT_BOUND 5 -#define NBD_RT_DESTROY_ON_DISCONNECT 6 -#define NBD_RT_DISCONNECT_ON_CLOSE 7 +#define NBD_RT_DISCONNECT_ON_CLOSE 6 #define NBD_DESTROY_ON_DISCONNECT 0 #define NBD_DISCONNECT_REQUESTED 1 @@ -1924,12 +1923,21 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { - set_bit(NBD_RT_DESTROY_ON_DISCONNECT, - &config->runtime_flags); - set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags); - put_dev = true; + /* + * We have 1 ref to keep the device around, and then 1 + * ref for our current operation here, which will be + * inherited by the config. If we already have + * DESTROY_ON_DISCONNECT set then we know we don't have + * that extra ref already held so we don't need the + * put_dev. + */ + if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT, + &nbd->flags)) + put_dev = true; } else { - clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags); + if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT, + &nbd->flags)) + refcount_inc(&nbd->refs); } if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { set_bit(NBD_RT_DISCONNECT_ON_CLOSE, @@ -2100,15 +2108,13 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) { u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]); if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) { - if (!test_and_set_bit(NBD_RT_DESTROY_ON_DISCONNECT, - &config->runtime_flags)) + if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT, + &nbd->flags)) put_dev = true; - set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags); } else { - if (test_and_clear_bit(NBD_RT_DESTROY_ON_DISCONNECT, - &config->runtime_flags)) + if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT, + &nbd->flags)) refcount_inc(&nbd->refs); - clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags); } if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) { diff --git a/drivers/block/rsxx/core.c b/drivers/block/rsxx/core.c index 63f549889f87..227e1be4c6f9 100644 --- a/drivers/block/rsxx/core.c +++ b/drivers/block/rsxx/core.c @@ -165,15 +165,17 @@ static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf, { struct rsxx_cardinfo *card = file_inode(fp)->i_private; char *buf; - ssize_t st; + int st; buf = kzalloc(cnt, GFP_KERNEL); if (!buf) return -ENOMEM; st = rsxx_creg_read(card, CREG_ADD_CRAM + (u32)*ppos, cnt, buf, 1); - if (!st) - st = copy_to_user(ubuf, buf, cnt); + if (!st) { + if (copy_to_user(ubuf, buf, cnt)) + st = -EFAULT; + } kfree(buf); if (st) return st; @@ -869,6 +871,7 @@ static int rsxx_pci_probe(struct pci_dev *dev, card->event_wq = create_singlethread_workqueue(DRIVER_NAME"_event"); if (!card->event_wq) { dev_err(CARD_TO_DEV(card), "Failed card event setup.\n"); + st = -ENOMEM; goto failed_event_handler; } diff --git a/drivers/block/umem.c b/drivers/block/umem.c index 2b95d7b33b91..5eb44e4a91ee 100644 --- a/drivers/block/umem.c +++ b/drivers/block/umem.c @@ -877,6 +877,7 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) if (card->mm_pages[0].desc == NULL || card->mm_pages[1].desc == NULL) { dev_printk(KERN_ERR, &card->dev->dev, "alloc failed\n"); + ret = -ENOMEM; goto failed_alloc; } reset_page(&card->mm_pages[0]); @@ -888,8 +889,10 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) spin_lock_init(&card->lock); card->queue = blk_alloc_queue(NUMA_NO_NODE); - if (!card->queue) + if (!card->queue) { + ret = -ENOMEM; goto failed_alloc; + } tasklet_init(&card->tasklet, process_page, (unsigned long)card); diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 9ebf53903d7b..3874233f7194 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -794,8 +794,13 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring, pages[i]->persistent_gnt = persistent_gnt; } else { if (gnttab_page_cache_get(&ring->free_pages, - &pages[i]->page)) - goto out_of_memory; + &pages[i]->page)) { + gnttab_page_cache_put(&ring->free_pages, + pages_to_gnt, + segs_to_map); + ret = -ENOMEM; + goto out; + } addr = vaddr(pages[i]->page); pages_to_gnt[segs_to_map] = pages[i]->page; pages[i]->persistent_gnt = NULL; @@ -811,10 +816,8 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring, break; } - if (segs_to_map) { + if (segs_to_map) ret = gnttab_map_refs(map, NULL, pages_to_gnt, segs_to_map); - BUG_ON(ret); - } /* * Now swizzle the MFN in our domain with the MFN from the other domain @@ -830,7 +833,7 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring, gnttab_page_cache_put(&ring->free_pages, &pages[seg_idx]->page, 1); pages[seg_idx]->handle = BLKBACK_INVALID_HANDLE; - ret |= 1; + ret |= !ret; goto next; } pages[seg_idx]->handle = map[new_map_idx].handle; @@ -882,17 +885,18 @@ static int xen_blkbk_map(struct xen_blkif_ring *ring, } segs_to_map = 0; last_map = map_until; - if (map_until != num) + if (!ret && map_until != num) goto again; - return ret; - -out_of_memory: - pr_alert("%s: out of memory\n", __func__); - gnttab_page_cache_put(&ring->free_pages, pages_to_gnt, segs_to_map); - for (i = last_map; i < num; i++) +out: + for (i = last_map; i < num; i++) { + /* Don't zap current batch's valid persistent grants. */ + if(i >= map_until) + pages[i]->persistent_gnt = NULL; pages[i]->handle = BLKBACK_INVALID_HANDLE; - return -ENOMEM; + } + + return ret; } static int xen_blkbk_map_seg(struct pending_req *pending_req) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e2933cb7a82a..37d11103a706 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -628,7 +628,7 @@ static ssize_t writeback_store(struct device *dev, struct bio_vec bio_vec; struct page *page; ssize_t ret = len; - int mode; + int mode, err; unsigned long blk_idx = 0; if (sysfs_streq(buf, "idle")) @@ -639,8 +639,8 @@ static ssize_t writeback_store(struct device *dev, if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1)) return -EINVAL; - ret = kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index); - if (ret || index >= nr_pages) + if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) || + index >= nr_pages) return -EINVAL; nr_pages = 1; @@ -664,7 +664,7 @@ static ssize_t writeback_store(struct device *dev, goto release_init_lock; } - while (nr_pages--) { + for (; nr_pages != 0; index++, nr_pages--) { struct bio_vec bvec; bvec.bv_page = page; @@ -729,12 +729,17 @@ static ssize_t writeback_store(struct device *dev, * XXX: A single page IO would be inefficient for write * but it would be not bad as starter. */ - ret = submit_bio_wait(&bio); - if (ret) { + err = submit_bio_wait(&bio); + if (err) { zram_slot_lock(zram, index); zram_clear_flag(zram, index, ZRAM_UNDER_WB); zram_clear_flag(zram, index, ZRAM_IDLE); zram_slot_unlock(zram, index); + /* + * Return last IO error unless every IO were + * not suceeded. + */ + ret = err; continue; } @@ -1082,7 +1087,7 @@ static ssize_t mm_stat_show(struct device *dev, zram->limit_pages << PAGE_SHIFT, max_used << PAGE_SHIFT, (u64)atomic64_read(&zram->stats.same_pages), - pool_stats.pages_compacted, + atomic_long_read(&pool_stats.pages_compacted), (u64)atomic64_read(&zram->stats.huge_pages), (u64)atomic64_read(&zram->stats.huge_pages_since)); up_read(&zram->init_lock); diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c index 98d53764871f..2acb719e596f 100644 --- a/drivers/bluetooth/btqcomsmd.c +++ b/drivers/bluetooth/btqcomsmd.c @@ -142,12 +142,16 @@ static int btqcomsmd_probe(struct platform_device *pdev) btq->cmd_channel = qcom_wcnss_open_channel(wcnss, "APPS_RIVA_BT_CMD", btqcomsmd_cmd_callback, btq); - if (IS_ERR(btq->cmd_channel)) - return PTR_ERR(btq->cmd_channel); + if (IS_ERR(btq->cmd_channel)) { + ret = PTR_ERR(btq->cmd_channel); + goto destroy_acl_channel; + } hdev = hci_alloc_dev(); - if (!hdev) - return -ENOMEM; + if (!hdev) { + ret = -ENOMEM; + goto destroy_cmd_channel; + } hci_set_drvdata(hdev, btq); btq->hdev = hdev; @@ -161,14 +165,21 @@ static int btqcomsmd_probe(struct platform_device *pdev) hdev->set_bdaddr = qca_set_bdaddr_rome; ret = hci_register_dev(hdev); - if (ret < 0) { - hci_free_dev(hdev); - return ret; - } + if (ret < 0) + goto hci_free_dev; platform_set_drvdata(pdev, btq); return 0; + +hci_free_dev: + hci_free_dev(hdev); +destroy_cmd_channel: + rpmsg_destroy_ept(btq->cmd_channel); +destroy_acl_channel: + rpmsg_destroy_ept(btq->acl_channel); + + return ret; } static int btqcomsmd_remove(struct platform_device *pdev) diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 03b83aa91277..a4f834a50a98 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -506,7 +506,6 @@ static const struct dmi_system_id btusb_needs_reset_resume_table[] = { #define BTUSB_HW_RESET_ACTIVE 12 #define BTUSB_TX_WAIT_VND_EVT 13 #define BTUSB_WAKEUP_DISABLE 14 -#define BTUSB_USE_ALT1_FOR_WBS 15 struct btusb_data { struct hci_dev *hdev; @@ -1736,15 +1735,12 @@ static void btusb_work(struct work_struct *work) new_alts = data->sco_num; } } else if (data->air_mode == HCI_NOTIFY_ENABLE_SCO_TRANSP) { - /* Check if Alt 6 is supported for Transparent audio */ - if (btusb_find_altsetting(data, 6)) { - data->usb_alt6_packet_flow = true; - new_alts = 6; - } else if (test_bit(BTUSB_USE_ALT1_FOR_WBS, &data->flags)) { - new_alts = 1; - } else { - bt_dev_err(hdev, "Device does not support ALT setting 6"); - } + /* Bluetooth USB spec recommends alt 6 (63 bytes), but + * many adapters do not support it. Alt 1 appears to + * work for all adapters that do not have alt 6, and + * which work with WBS at all. + */ + new_alts = btusb_find_altsetting(data, 6) ? 6 : 1; } if (btusb_switch_alt_setting(hdev, new_alts) < 0) @@ -3199,7 +3195,7 @@ static void btusb_mtk_wmt_recv(struct urb *urb) skb = bt_skb_alloc(HCI_WMT_MAX_EVENT_SIZE, GFP_ATOMIC); if (!skb) { hdev->stat.err_rx++; - goto err_out; + return; } hci_skb_pkt_type(skb) = HCI_EVENT_PKT; @@ -3217,13 +3213,18 @@ static void btusb_mtk_wmt_recv(struct urb *urb) */ if (test_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags)) { data->evt_skb = skb_clone(skb, GFP_ATOMIC); - if (!data->evt_skb) - goto err_out; + if (!data->evt_skb) { + kfree_skb(skb); + return; + } } err = hci_recv_frame(hdev, skb); - if (err < 0) - goto err_free_skb; + if (err < 0) { + kfree_skb(data->evt_skb); + data->evt_skb = NULL; + return; + } if (test_and_clear_bit(BTUSB_TX_WAIT_VND_EVT, &data->flags)) { @@ -3232,11 +3233,6 @@ static void btusb_mtk_wmt_recv(struct urb *urb) wake_up_bit(&data->flags, BTUSB_TX_WAIT_VND_EVT); } -err_out: - return; -err_free_skb: - kfree_skb(data->evt_skb); - data->evt_skb = NULL; return; } else if (urb->status == -ENOENT) { /* Avoid suspend failed when usb_kill_urb */ @@ -4069,6 +4065,13 @@ static int btusb_setup_qca(struct hci_dev *hdev) info = &qca_devices_table[i]; } if (!info) { + /* If the rom_version is not matched in the qca_devices_table + * and the high ROM version is not zero, we assume this chip no + * need to load the rampatch and nvm. + */ + if (ver_rom & ~0xffffU) + return 0; + bt_dev_err(hdev, "don't support firmware rome 0x%x", ver_rom); return -ENODEV; } @@ -4548,10 +4551,6 @@ static int btusb_probe(struct usb_interface *intf, * (DEVICE_REMOTE_WAKEUP) */ set_bit(BTUSB_WAKEUP_DISABLE, &data->flags); - if (btusb_find_altsetting(data, 1)) - set_bit(BTUSB_USE_ALT1_FOR_WBS, &data->flags); - else - bt_dev_err(hdev, "Device does not support ALT setting 1"); } if (!reset) diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c index 7be16a7f653b..95ecd30e6619 100644 --- a/drivers/bluetooth/hci_h5.c +++ b/drivers/bluetooth/hci_h5.c @@ -906,6 +906,11 @@ static int h5_btrtl_setup(struct h5 *h5) /* Give the device some time before the hci-core sends it a reset */ usleep_range(10000, 20000); + /* Enable controller to do both LE scan and BR/EDR inquiry + * simultaneously. + */ + set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &h5->hu->hdev->quirks); + out_free: btrtl_free(btrtl_dev); diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index f83d67eafc9f..637c5b8c2aa1 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -127,10 +127,9 @@ int hci_uart_tx_wakeup(struct hci_uart *hu) if (!test_bit(HCI_UART_PROTO_READY, &hu->flags)) goto no_schedule; - if (test_and_set_bit(HCI_UART_SENDING, &hu->tx_state)) { - set_bit(HCI_UART_TX_WAKEUP, &hu->tx_state); + set_bit(HCI_UART_TX_WAKEUP, &hu->tx_state); + if (test_and_set_bit(HCI_UART_SENDING, &hu->tx_state)) goto no_schedule; - } BT_DBG(""); @@ -174,10 +173,10 @@ static void hci_uart_write_work(struct work_struct *work) kfree_skb(skb); } + clear_bit(HCI_UART_SENDING, &hu->tx_state); if (test_bit(HCI_UART_TX_WAKEUP, &hu->tx_state)) goto restart; - clear_bit(HCI_UART_SENDING, &hu->tx_state); wake_up_bit(&hu->tx_state, HCI_UART_SENDING); } @@ -802,7 +801,8 @@ static int hci_uart_tty_ioctl(struct tty_struct *tty, struct file *file, * We don't provide read/write/poll interface for user space. */ static ssize_t hci_uart_tty_read(struct tty_struct *tty, struct file *file, - unsigned char __user *buf, size_t nr) + unsigned char *buf, size_t nr, + void **cookie, unsigned long offset) { return 0; } @@ -819,29 +819,28 @@ static __poll_t hci_uart_tty_poll(struct tty_struct *tty, return 0; } +static struct tty_ldisc_ops hci_uart_ldisc = { + .owner = THIS_MODULE, + .magic = TTY_LDISC_MAGIC, + .name = "n_hci", + .open = hci_uart_tty_open, + .close = hci_uart_tty_close, + .read = hci_uart_tty_read, + .write = hci_uart_tty_write, + .ioctl = hci_uart_tty_ioctl, + .compat_ioctl = hci_uart_tty_ioctl, + .poll = hci_uart_tty_poll, + .receive_buf = hci_uart_tty_receive, + .write_wakeup = hci_uart_tty_wakeup, +}; + static int __init hci_uart_init(void) { - static struct tty_ldisc_ops hci_uart_ldisc; int err; BT_INFO("HCI UART driver ver %s", VERSION); /* Register the tty discipline */ - - memset(&hci_uart_ldisc, 0, sizeof(hci_uart_ldisc)); - hci_uart_ldisc.magic = TTY_LDISC_MAGIC; - hci_uart_ldisc.name = "n_hci"; - hci_uart_ldisc.open = hci_uart_tty_open; - hci_uart_ldisc.close = hci_uart_tty_close; - hci_uart_ldisc.read = hci_uart_tty_read; - hci_uart_ldisc.write = hci_uart_tty_write; - hci_uart_ldisc.ioctl = hci_uart_tty_ioctl; - hci_uart_ldisc.compat_ioctl = hci_uart_tty_ioctl; - hci_uart_ldisc.poll = hci_uart_tty_poll; - hci_uart_ldisc.receive_buf = hci_uart_tty_receive; - hci_uart_ldisc.write_wakeup = hci_uart_tty_wakeup; - hci_uart_ldisc.owner = THIS_MODULE; - err = tty_register_ldisc(N_HCI, &hci_uart_ldisc); if (err) { BT_ERR("HCI line discipline registration failed. (%d)", err); diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 4a963682c702..de36af63e182 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -50,7 +50,8 @@ #define IBS_HOST_TX_IDLE_TIMEOUT_MS 2000 #define CMD_TRANS_TIMEOUT_MS 100 #define MEMDUMP_TIMEOUT_MS 8000 -#define IBS_DISABLE_SSR_TIMEOUT_MS (MEMDUMP_TIMEOUT_MS + 1000) +#define IBS_DISABLE_SSR_TIMEOUT_MS \ + (MEMDUMP_TIMEOUT_MS + FW_DOWNLOAD_TIMEOUT_MS) #define FW_DOWNLOAD_TIMEOUT_MS 3000 /* susclk rate */ @@ -76,7 +77,8 @@ enum qca_flags { QCA_MEMDUMP_COLLECTION, QCA_HW_ERROR_EVENT, QCA_SSR_TRIGGERED, - QCA_BT_OFF + QCA_BT_OFF, + QCA_ROM_FW }; enum qca_capabilities { @@ -1024,7 +1026,9 @@ static void qca_controller_memdump(struct work_struct *work) dump_size = __le32_to_cpu(dump->dump_size); if (!(dump_size)) { bt_dev_err(hu->hdev, "Rx invalid memdump size"); + kfree(qca_memdump); kfree_skb(skb); + qca->qca_memdump = NULL; mutex_unlock(&qca->hci_memdump_lock); return; } @@ -1661,6 +1665,7 @@ static int qca_setup(struct hci_uart *hu) if (ret) return ret; + clear_bit(QCA_ROM_FW, &qca->flags); /* Patch downloading has to be done without IBS mode */ set_bit(QCA_IBS_DISABLED, &qca->flags); @@ -1718,12 +1723,14 @@ static int qca_setup(struct hci_uart *hu) hu->hdev->cmd_timeout = qca_cmd_timeout; } else if (ret == -ENOENT) { /* No patch/nvm-config found, run with original fw/config */ + set_bit(QCA_ROM_FW, &qca->flags); ret = 0; } else if (ret == -EAGAIN) { /* * Userspace firmware loader will return -EAGAIN in case no * patch/nvm-config is found, so run with original fw/config. */ + set_bit(QCA_ROM_FW, &qca->flags); ret = 0; } @@ -2100,17 +2107,29 @@ static int __maybe_unused qca_suspend(struct device *dev) set_bit(QCA_SUSPENDING, &qca->flags); - if (test_bit(QCA_BT_OFF, &qca->flags)) + /* if BT SoC is running with default firmware then it does not + * support in-band sleep + */ + if (test_bit(QCA_ROM_FW, &qca->flags)) + return 0; + + /* During SSR after memory dump collection, controller will be + * powered off and then powered on.If controller is powered off + * during SSR then we should wait until SSR is completed. + */ + if (test_bit(QCA_BT_OFF, &qca->flags) && + !test_bit(QCA_SSR_TRIGGERED, &qca->flags)) return 0; - if (test_bit(QCA_IBS_DISABLED, &qca->flags)) { + if (test_bit(QCA_IBS_DISABLED, &qca->flags) || + test_bit(QCA_SSR_TRIGGERED, &qca->flags)) { wait_timeout = test_bit(QCA_SSR_TRIGGERED, &qca->flags) ? IBS_DISABLE_SSR_TIMEOUT_MS : FW_DOWNLOAD_TIMEOUT_MS; /* QCA_IBS_DISABLED flag is set to true, During FW download * and during memory dump collection. It is reset to false, - * After FW download complete and after memory dump collections. + * After FW download complete. */ wait_on_bit_timeout(&qca->flags, QCA_IBS_DISABLED, TASK_UNINTERRUPTIBLE, msecs_to_jiffies(wait_timeout)); @@ -2122,10 +2141,6 @@ static int __maybe_unused qca_suspend(struct device *dev) } } - /* After memory dump collection, Controller is powered off.*/ - if (test_bit(QCA_BT_OFF, &qca->flags)) - return 0; - cancel_work_sync(&qca->ws_awake_device); cancel_work_sync(&qca->ws_awake_rx); diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c index ef96ad06fa54..9e03402ef1b3 100644 --- a/drivers/bluetooth/hci_serdev.c +++ b/drivers/bluetooth/hci_serdev.c @@ -83,9 +83,9 @@ static void hci_uart_write_work(struct work_struct *work) hci_uart_tx_complete(hu, hci_skb_pkt_type(skb)); kfree_skb(skb); } - } while (test_bit(HCI_UART_TX_WAKEUP, &hu->tx_state)); - clear_bit(HCI_UART_SENDING, &hu->tx_state); + clear_bit(HCI_UART_SENDING, &hu->tx_state); + } while (test_bit(HCI_UART_TX_WAKEUP, &hu->tx_state)); } /* ------- Interface to HCI layer ------ */ diff --git a/drivers/bus/mhi/core/init.c b/drivers/bus/mhi/core/init.c index f0697f433c2f..08c45457c90f 100644 --- a/drivers/bus/mhi/core/init.c +++ b/drivers/bus/mhi/core/init.c @@ -552,6 +552,9 @@ void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl, tre_ring = &mhi_chan->tre_ring; chan_ctxt = &mhi_cntrl->mhi_ctxt->chan_ctxt[mhi_chan->chan]; + if (!chan_ctxt->rbase) /* Already uninitialized */ + return; + mhi_free_coherent(mhi_cntrl, tre_ring->alloc_size, tre_ring->pre_aligned, tre_ring->dma_handle); vfree(buf_ring->base); diff --git a/drivers/bus/omap_l3_noc.c b/drivers/bus/omap_l3_noc.c index b040447575ad..dcfb32ee5cb6 100644 --- a/drivers/bus/omap_l3_noc.c +++ b/drivers/bus/omap_l3_noc.c @@ -285,7 +285,7 @@ static int omap_l3_probe(struct platform_device *pdev) */ l3->debug_irq = platform_get_irq(pdev, 0); ret = devm_request_irq(l3->dev, l3->debug_irq, l3_interrupt_handler, - 0x0, "l3-dbg-irq", l3); + IRQF_NO_THREAD, "l3-dbg-irq", l3); if (ret) { dev_err(l3->dev, "request_irq failed for %d\n", l3->debug_irq); @@ -294,7 +294,7 @@ static int omap_l3_probe(struct platform_device *pdev) l3->app_irq = platform_get_irq(pdev, 1); ret = devm_request_irq(l3->dev, l3->app_irq, l3_interrupt_handler, - 0x0, "l3-app-irq", l3); + IRQF_NO_THREAD, "l3-app-irq", l3); if (ret) dev_err(l3->dev, "request_irq failed for %d\n", l3->app_irq); diff --git a/drivers/char/hw_random/ingenic-trng.c b/drivers/char/hw_random/ingenic-trng.c index 954a8411d67d..0eb80f786f4d 100644 --- a/drivers/char/hw_random/ingenic-trng.c +++ b/drivers/char/hw_random/ingenic-trng.c @@ -113,13 +113,17 @@ static int ingenic_trng_probe(struct platform_device *pdev) ret = hwrng_register(&trng->rng); if (ret) { dev_err(&pdev->dev, "Failed to register hwrng\n"); - return ret; + goto err_unprepare_clk; } platform_set_drvdata(pdev, trng); dev_info(&pdev->dev, "Ingenic DTRNG driver registered\n"); return 0; + +err_unprepare_clk: + clk_disable_unprepare(trng->clk); + return ret; } static int ingenic_trng_remove(struct platform_device *pdev) diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c index e262445fed5f..f35f0f31f52a 100644 --- a/drivers/char/hw_random/timeriomem-rng.c +++ b/drivers/char/hw_random/timeriomem-rng.c @@ -69,7 +69,7 @@ static int timeriomem_rng_read(struct hwrng *hwrng, void *data, */ if (retval > 0) usleep_range(period_us, - period_us + min(1, period_us / 100)); + period_us + max(1, period_us / 100)); *(u32 *)data = readl(priv->io_base); retval += sizeof(u32); diff --git a/drivers/char/random.c b/drivers/char/random.c index 5f3b8ac9d97b..a894c0559a8c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1972,7 +1972,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg) return -EPERM; if (crng_init < 2) return -ENODATA; - crng_reseed(&primary_crng, NULL); + crng_reseed(&primary_crng, &input_pool); crng_global_init_time = jiffies - 1; return 0; default: diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 947d1db0a5cc..283f78211c3a 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -164,8 +164,6 @@ extern const struct file_operations tpmrm_fops; extern struct idr dev_nums_idr; ssize_t tpm_transmit(struct tpm_chip *chip, u8 *buf, size_t bufsiz); -ssize_t tpm_transmit_cmd(struct tpm_chip *chip, struct tpm_buf *buf, - size_t min_rsp_body_length, const char *desc); int tpm_get_timeouts(struct tpm_chip *); int tpm_auto_startup(struct tpm_chip *chip); @@ -194,8 +192,6 @@ static inline void tpm_msleep(unsigned int delay_msec) int tpm_chip_start(struct tpm_chip *chip); void tpm_chip_stop(struct tpm_chip *chip); struct tpm_chip *tpm_find_get_ops(struct tpm_chip *chip); -__must_check int tpm_try_get_ops(struct tpm_chip *chip); -void tpm_put_ops(struct tpm_chip *chip); struct tpm_chip *tpm_chip_alloc(struct device *dev, const struct tpm_class_ops *ops); diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c index 92c51c6cfd1b..a2e0395cbe61 100644 --- a/drivers/char/tpm/tpm_tis_core.c +++ b/drivers/char/tpm/tpm_tis_core.c @@ -125,7 +125,8 @@ static bool check_locality(struct tpm_chip *chip, int l) if (rc < 0) return false; - if ((access & (TPM_ACCESS_ACTIVE_LOCALITY | TPM_ACCESS_VALID)) == + if ((access & (TPM_ACCESS_ACTIVE_LOCALITY | TPM_ACCESS_VALID + | TPM_ACCESS_REQUEST_USE)) == (TPM_ACCESS_ACTIVE_LOCALITY | TPM_ACCESS_VALID)) { priv->locality = l; return true; @@ -134,58 +135,13 @@ static bool check_locality(struct tpm_chip *chip, int l) return false; } -static bool locality_inactive(struct tpm_chip *chip, int l) -{ - struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev); - int rc; - u8 access; - - rc = tpm_tis_read8(priv, TPM_ACCESS(l), &access); - if (rc < 0) - return false; - - if ((access & (TPM_ACCESS_VALID | TPM_ACCESS_ACTIVE_LOCALITY)) - == TPM_ACCESS_VALID) - return true; - - return false; -} - static int release_locality(struct tpm_chip *chip, int l) { struct tpm_tis_data *priv = dev_get_drvdata(&chip->dev); - unsigned long stop, timeout; - long rc; tpm_tis_write8(priv, TPM_ACCESS(l), TPM_ACCESS_ACTIVE_LOCALITY); - stop = jiffies + chip->timeout_a; - - if (chip->flags & TPM_CHIP_FLAG_IRQ) { -again: - timeout = stop - jiffies; - if ((long)timeout <= 0) - return -1; - - rc = wait_event_interruptible_timeout(priv->int_queue, - (locality_inactive(chip, l)), - timeout); - - if (rc > 0) - return 0; - - if (rc == -ERESTARTSYS && freezing(current)) { - clear_thread_flag(TIF_SIGPENDING); - goto again; - } - } else { - do { - if (locality_inactive(chip, l)) - return 0; - tpm_msleep(TPM_TIMEOUT); - } while (time_before(jiffies, stop)); - } - return -1; + return 0; } static int request_locality(struct tpm_chip *chip, int l) @@ -751,12 +707,22 @@ static int tpm_tis_gen_interrupt(struct tpm_chip *chip) const char *desc = "attempting to generate an interrupt"; u32 cap2; cap_t cap; + int ret; + /* TPM 2.0 */ if (chip->flags & TPM_CHIP_FLAG_TPM2) return tpm2_get_tpm_pt(chip, 0x100, &cap2, desc); - else - return tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, - 0); + + /* TPM 1.2 */ + ret = request_locality(chip, 0); + if (ret < 0) + return ret; + + ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0); + + release_locality(chip, 0); + + return ret; } /* Register the IRQ and issue a command that will cause an interrupt. If an @@ -1063,11 +1029,21 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq, init_waitqueue_head(&priv->read_queue); init_waitqueue_head(&priv->int_queue); if (irq != -1) { - /* Before doing irq testing issue a command to the TPM in polling mode + /* + * Before doing irq testing issue a command to the TPM in polling mode * to make sure it works. May as well use that command to set the * proper timeouts for the driver. */ - if (tpm_get_timeouts(chip)) { + + rc = request_locality(chip, 0); + if (rc < 0) + goto out_err; + + rc = tpm_get_timeouts(chip); + + release_locality(chip, 0); + + if (rc) { dev_err(dev, "Could not get TPM timeouts and durations\n"); rc = -ENODEV; goto out_err; diff --git a/drivers/clk/clk-ast2600.c b/drivers/clk/clk-ast2600.c index 177368cac6dd..a55b37fc2c8b 100644 --- a/drivers/clk/clk-ast2600.c +++ b/drivers/clk/clk-ast2600.c @@ -17,7 +17,8 @@ #define ASPEED_G6_NUM_CLKS 71 -#define ASPEED_G6_SILICON_REV 0x004 +#define ASPEED_G6_SILICON_REV 0x014 +#define CHIP_REVISION_ID GENMASK(23, 16) #define ASPEED_G6_RESET_CTRL 0x040 #define ASPEED_G6_RESET_CTRL2 0x050 @@ -190,18 +191,34 @@ static struct clk_hw *ast2600_calc_pll(const char *name, u32 val) static struct clk_hw *ast2600_calc_apll(const char *name, u32 val) { unsigned int mult, div; + u32 chip_id = readl(scu_g6_base + ASPEED_G6_SILICON_REV); - if (val & BIT(20)) { - /* Pass through mode */ - mult = div = 1; + if (((chip_id & CHIP_REVISION_ID) >> 16) >= 2) { + if (val & BIT(24)) { + /* Pass through mode */ + mult = div = 1; + } else { + /* F = 25Mhz * [(m + 1) / (n + 1)] / (p + 1) */ + u32 m = val & 0x1fff; + u32 n = (val >> 13) & 0x3f; + u32 p = (val >> 19) & 0xf; + + mult = (m + 1); + div = (n + 1) * (p + 1); + } } else { - /* F = 25Mhz * (2-od) * [(m + 2) / (n + 1)] */ - u32 m = (val >> 5) & 0x3f; - u32 od = (val >> 4) & 0x1; - u32 n = val & 0xf; + if (val & BIT(20)) { + /* Pass through mode */ + mult = div = 1; + } else { + /* F = 25Mhz * (2-od) * [(m + 2) / (n + 1)] */ + u32 m = (val >> 5) & 0x3f; + u32 od = (val >> 4) & 0x1; + u32 n = val & 0xf; - mult = (2 - od) * (m + 2); - div = n + 1; + mult = (2 - od) * (m + 2); + div = n + 1; + } } return clk_hw_register_fixed_factor(NULL, name, "clkin", 0, mult, div); diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c index c499799693cc..344997203f0e 100644 --- a/drivers/clk/clk-divider.c +++ b/drivers/clk/clk-divider.c @@ -494,8 +494,13 @@ struct clk_hw *__clk_hw_register_divider(struct device *dev, else init.ops = &clk_divider_ops; init.flags = flags; - init.parent_names = (parent_name ? &parent_name: NULL); - init.num_parents = (parent_name ? 1 : 0); + init.parent_names = parent_name ? &parent_name : NULL; + init.parent_hws = parent_hw ? &parent_hw : NULL; + init.parent_data = parent_data; + if (parent_name || parent_hw || parent_data) + init.num_parents = 1; + else + init.num_parents = 0; /* struct clk_divider assignments */ div->reg = reg; diff --git a/drivers/clk/meson/clk-pll.c b/drivers/clk/meson/clk-pll.c index b17a13e9337c..49f27fe53213 100644 --- a/drivers/clk/meson/clk-pll.c +++ b/drivers/clk/meson/clk-pll.c @@ -365,13 +365,14 @@ static int meson_clk_pll_set_rate(struct clk_hw *hw, unsigned long rate, { struct clk_regmap *clk = to_clk_regmap(hw); struct meson_clk_pll_data *pll = meson_clk_pll_data(clk); - unsigned int enabled, m, n, frac = 0, ret; + unsigned int enabled, m, n, frac = 0; unsigned long old_rate; + int ret; if (parent_rate == 0 || rate == 0) return -EINVAL; - old_rate = rate; + old_rate = clk_hw_get_rate(hw); ret = meson_clk_get_pll_settings(rate, parent_rate, &m, &n, pll); if (ret) @@ -393,7 +394,8 @@ static int meson_clk_pll_set_rate(struct clk_hw *hw, unsigned long rate, if (!enabled) return 0; - if (meson_clk_pll_enable(hw)) { + ret = meson_clk_pll_enable(hw); + if (ret) { pr_warn("%s: pll did not lock, trying to restore old rate %lu\n", __func__, old_rate); /* @@ -405,7 +407,7 @@ static int meson_clk_pll_set_rate(struct clk_hw *hw, unsigned long rate, meson_clk_pll_set_rate(hw, old_rate, parent_rate); } - return 0; + return ret; } /* diff --git a/drivers/clk/qcom/gcc-msm8998.c b/drivers/clk/qcom/gcc-msm8998.c index 9d7016bcd680..b8dcfe62312b 100644 --- a/drivers/clk/qcom/gcc-msm8998.c +++ b/drivers/clk/qcom/gcc-msm8998.c @@ -135,7 +135,7 @@ static struct pll_vco fabia_vco[] = { static struct clk_alpha_pll gpll0 = { .offset = 0x0, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .vco_table = fabia_vco, .num_vco = ARRAY_SIZE(fabia_vco), .clkr = { @@ -145,58 +145,58 @@ static struct clk_alpha_pll gpll0 = { .name = "gpll0", .parent_names = (const char *[]){ "xo" }, .num_parents = 1, - .ops = &clk_alpha_pll_ops, + .ops = &clk_alpha_pll_fixed_fabia_ops, } }, }; static struct clk_alpha_pll_postdiv gpll0_out_even = { .offset = 0x0, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll0_out_even", .parent_names = (const char *[]){ "gpll0" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll_postdiv gpll0_out_main = { .offset = 0x0, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll0_out_main", .parent_names = (const char *[]){ "gpll0" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll_postdiv gpll0_out_odd = { .offset = 0x0, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll0_out_odd", .parent_names = (const char *[]){ "gpll0" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll_postdiv gpll0_out_test = { .offset = 0x0, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll0_out_test", .parent_names = (const char *[]){ "gpll0" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll gpll1 = { .offset = 0x1000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .vco_table = fabia_vco, .num_vco = ARRAY_SIZE(fabia_vco), .clkr = { @@ -206,58 +206,58 @@ static struct clk_alpha_pll gpll1 = { .name = "gpll1", .parent_names = (const char *[]){ "xo" }, .num_parents = 1, - .ops = &clk_alpha_pll_ops, + .ops = &clk_alpha_pll_fixed_fabia_ops, } }, }; static struct clk_alpha_pll_postdiv gpll1_out_even = { .offset = 0x1000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll1_out_even", .parent_names = (const char *[]){ "gpll1" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll_postdiv gpll1_out_main = { .offset = 0x1000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll1_out_main", .parent_names = (const char *[]){ "gpll1" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll_postdiv gpll1_out_odd = { .offset = 0x1000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll1_out_odd", .parent_names = (const char *[]){ "gpll1" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll_postdiv gpll1_out_test = { .offset = 0x1000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll1_out_test", .parent_names = (const char *[]){ "gpll1" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll gpll2 = { .offset = 0x2000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .vco_table = fabia_vco, .num_vco = ARRAY_SIZE(fabia_vco), .clkr = { @@ -267,58 +267,58 @@ static struct clk_alpha_pll gpll2 = { .name = "gpll2", .parent_names = (const char *[]){ "xo" }, .num_parents = 1, - .ops = &clk_alpha_pll_ops, + .ops = &clk_alpha_pll_fixed_fabia_ops, } }, }; static struct clk_alpha_pll_postdiv gpll2_out_even = { .offset = 0x2000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll2_out_even", .parent_names = (const char *[]){ "gpll2" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll_postdiv gpll2_out_main = { .offset = 0x2000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll2_out_main", .parent_names = (const char *[]){ "gpll2" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll_postdiv gpll2_out_odd = { .offset = 0x2000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll2_out_odd", .parent_names = (const char *[]){ "gpll2" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll_postdiv gpll2_out_test = { .offset = 0x2000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll2_out_test", .parent_names = (const char *[]){ "gpll2" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll gpll3 = { .offset = 0x3000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .vco_table = fabia_vco, .num_vco = ARRAY_SIZE(fabia_vco), .clkr = { @@ -328,58 +328,58 @@ static struct clk_alpha_pll gpll3 = { .name = "gpll3", .parent_names = (const char *[]){ "xo" }, .num_parents = 1, - .ops = &clk_alpha_pll_ops, + .ops = &clk_alpha_pll_fixed_fabia_ops, } }, }; static struct clk_alpha_pll_postdiv gpll3_out_even = { .offset = 0x3000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll3_out_even", .parent_names = (const char *[]){ "gpll3" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll_postdiv gpll3_out_main = { .offset = 0x3000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll3_out_main", .parent_names = (const char *[]){ "gpll3" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll_postdiv gpll3_out_odd = { .offset = 0x3000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll3_out_odd", .parent_names = (const char *[]){ "gpll3" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll_postdiv gpll3_out_test = { .offset = 0x3000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll3_out_test", .parent_names = (const char *[]){ "gpll3" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll gpll4 = { .offset = 0x77000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .vco_table = fabia_vco, .num_vco = ARRAY_SIZE(fabia_vco), .clkr = { @@ -389,52 +389,52 @@ static struct clk_alpha_pll gpll4 = { .name = "gpll4", .parent_names = (const char *[]){ "xo" }, .num_parents = 1, - .ops = &clk_alpha_pll_ops, + .ops = &clk_alpha_pll_fixed_fabia_ops, } }, }; static struct clk_alpha_pll_postdiv gpll4_out_even = { .offset = 0x77000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll4_out_even", .parent_names = (const char *[]){ "gpll4" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll_postdiv gpll4_out_main = { .offset = 0x77000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll4_out_main", .parent_names = (const char *[]){ "gpll4" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll_postdiv gpll4_out_odd = { .offset = 0x77000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll4_out_odd", .parent_names = (const char *[]){ "gpll4" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; static struct clk_alpha_pll_postdiv gpll4_out_test = { .offset = 0x77000, - .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_DEFAULT], + .regs = clk_alpha_pll_regs[CLK_ALPHA_PLL_TYPE_FABIA], .clkr.hw.init = &(struct clk_init_data){ .name = "gpll4_out_test", .parent_names = (const char *[]){ "gpll4" }, .num_parents = 1, - .ops = &clk_alpha_pll_postdiv_ops, + .ops = &clk_alpha_pll_postdiv_fabia_ops, }, }; diff --git a/drivers/clk/qcom/gcc-sc7180.c b/drivers/clk/qcom/gcc-sc7180.c index b05901b24917..da8b627ca156 100644 --- a/drivers/clk/qcom/gcc-sc7180.c +++ b/drivers/clk/qcom/gcc-sc7180.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2019-2020, The Linux Foundation. All rights reserved. + * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved. */ #include @@ -620,7 +620,7 @@ static struct clk_rcg2 gcc_sdcc1_apps_clk_src = { .name = "gcc_sdcc1_apps_clk_src", .parent_data = gcc_parent_data_1, .num_parents = 5, - .ops = &clk_rcg2_ops, + .ops = &clk_rcg2_floor_ops, }, }; @@ -642,7 +642,7 @@ static struct clk_rcg2 gcc_sdcc1_ice_core_clk_src = { .name = "gcc_sdcc1_ice_core_clk_src", .parent_data = gcc_parent_data_0, .num_parents = 4, - .ops = &clk_rcg2_floor_ops, + .ops = &clk_rcg2_ops, }, }; @@ -919,19 +919,6 @@ static struct clk_branch gcc_camera_throttle_hf_axi_clk = { }, }; -static struct clk_branch gcc_camera_xo_clk = { - .halt_reg = 0xb02c, - .halt_check = BRANCH_HALT, - .clkr = { - .enable_reg = 0xb02c, - .enable_mask = BIT(0), - .hw.init = &(struct clk_init_data){ - .name = "gcc_camera_xo_clk", - .ops = &clk_branch2_ops, - }, - }, -}; - static struct clk_branch gcc_ce1_ahb_clk = { .halt_reg = 0x4100c, .halt_check = BRANCH_HALT_VOTED, @@ -1096,19 +1083,6 @@ static struct clk_branch gcc_disp_throttle_hf_axi_clk = { }, }; -static struct clk_branch gcc_disp_xo_clk = { - .halt_reg = 0xb030, - .halt_check = BRANCH_HALT, - .clkr = { - .enable_reg = 0xb030, - .enable_mask = BIT(0), - .hw.init = &(struct clk_init_data){ - .name = "gcc_disp_xo_clk", - .ops = &clk_branch2_ops, - }, - }, -}; - static struct clk_branch gcc_gp1_clk = { .halt_reg = 0x64000, .halt_check = BRANCH_HALT, @@ -2159,19 +2133,6 @@ static struct clk_branch gcc_video_throttle_axi_clk = { }, }; -static struct clk_branch gcc_video_xo_clk = { - .halt_reg = 0xb028, - .halt_check = BRANCH_HALT, - .clkr = { - .enable_reg = 0xb028, - .enable_mask = BIT(0), - .hw.init = &(struct clk_init_data){ - .name = "gcc_video_xo_clk", - .ops = &clk_branch2_ops, - }, - }, -}; - static struct clk_branch gcc_mss_cfg_ahb_clk = { .halt_reg = 0x8a000, .halt_check = BRANCH_HALT, @@ -2304,7 +2265,6 @@ static struct clk_regmap *gcc_sc7180_clocks[] = { [GCC_BOOT_ROM_AHB_CLK] = &gcc_boot_rom_ahb_clk.clkr, [GCC_CAMERA_HF_AXI_CLK] = &gcc_camera_hf_axi_clk.clkr, [GCC_CAMERA_THROTTLE_HF_AXI_CLK] = &gcc_camera_throttle_hf_axi_clk.clkr, - [GCC_CAMERA_XO_CLK] = &gcc_camera_xo_clk.clkr, [GCC_CE1_AHB_CLK] = &gcc_ce1_ahb_clk.clkr, [GCC_CE1_AXI_CLK] = &gcc_ce1_axi_clk.clkr, [GCC_CE1_CLK] = &gcc_ce1_clk.clkr, @@ -2317,7 +2277,6 @@ static struct clk_regmap *gcc_sc7180_clocks[] = { [GCC_DISP_GPLL0_DIV_CLK_SRC] = &gcc_disp_gpll0_div_clk_src.clkr, [GCC_DISP_HF_AXI_CLK] = &gcc_disp_hf_axi_clk.clkr, [GCC_DISP_THROTTLE_HF_AXI_CLK] = &gcc_disp_throttle_hf_axi_clk.clkr, - [GCC_DISP_XO_CLK] = &gcc_disp_xo_clk.clkr, [GCC_GP1_CLK] = &gcc_gp1_clk.clkr, [GCC_GP1_CLK_SRC] = &gcc_gp1_clk_src.clkr, [GCC_GP2_CLK] = &gcc_gp2_clk.clkr, @@ -2413,7 +2372,6 @@ static struct clk_regmap *gcc_sc7180_clocks[] = { [GCC_VIDEO_AXI_CLK] = &gcc_video_axi_clk.clkr, [GCC_VIDEO_GPLL0_DIV_CLK_SRC] = &gcc_video_gpll0_div_clk_src.clkr, [GCC_VIDEO_THROTTLE_AXI_CLK] = &gcc_video_throttle_axi_clk.clkr, - [GCC_VIDEO_XO_CLK] = &gcc_video_xo_clk.clkr, [GPLL0] = &gpll0.clkr, [GPLL0_OUT_EVEN] = &gpll0_out_even.clkr, [GPLL6] = &gpll6.clkr, @@ -2510,6 +2468,9 @@ static int gcc_sc7180_probe(struct platform_device *pdev) regmap_update_bits(regmap, 0x0b004, BIT(0), BIT(0)); regmap_update_bits(regmap, 0x0b008, BIT(0), BIT(0)); regmap_update_bits(regmap, 0x0b00c, BIT(0), BIT(0)); + regmap_update_bits(regmap, 0x0b02c, BIT(0), BIT(0)); + regmap_update_bits(regmap, 0x0b028, BIT(0), BIT(0)); + regmap_update_bits(regmap, 0x0b030, BIT(0), BIT(0)); regmap_update_bits(regmap, 0x71004, BIT(0), BIT(0)); ret = qcom_cc_register_rcg_dfs(regmap, gcc_dfs_clocks, diff --git a/drivers/clk/qcom/gdsc.c b/drivers/clk/qcom/gdsc.c index af26e0695b86..51ed640e527b 100644 --- a/drivers/clk/qcom/gdsc.c +++ b/drivers/clk/qcom/gdsc.c @@ -183,7 +183,10 @@ static inline int gdsc_assert_reset(struct gdsc *sc) static inline void gdsc_force_mem_on(struct gdsc *sc) { int i; - u32 mask = RETAIN_MEM | RETAIN_PERIPH; + u32 mask = RETAIN_MEM; + + if (!(sc->flags & NO_RET_PERIPH)) + mask |= RETAIN_PERIPH; for (i = 0; i < sc->cxc_count; i++) regmap_update_bits(sc->regmap, sc->cxcs[i], mask, mask); @@ -192,7 +195,10 @@ static inline void gdsc_force_mem_on(struct gdsc *sc) static inline void gdsc_clear_mem_on(struct gdsc *sc) { int i; - u32 mask = RETAIN_MEM | RETAIN_PERIPH; + u32 mask = RETAIN_MEM; + + if (!(sc->flags & NO_RET_PERIPH)) + mask |= RETAIN_PERIPH; for (i = 0; i < sc->cxc_count; i++) regmap_update_bits(sc->regmap, sc->cxcs[i], mask, 0); diff --git a/drivers/clk/qcom/gdsc.h b/drivers/clk/qcom/gdsc.h index bd537438c793..5bb396b344d1 100644 --- a/drivers/clk/qcom/gdsc.h +++ b/drivers/clk/qcom/gdsc.h @@ -42,7 +42,7 @@ struct gdsc { #define PWRSTS_ON BIT(2) #define PWRSTS_OFF_ON (PWRSTS_OFF | PWRSTS_ON) #define PWRSTS_RET_ON (PWRSTS_RET | PWRSTS_ON) - const u8 flags; + const u16 flags; #define VOTABLE BIT(0) #define CLAMP_IO BIT(1) #define HW_CTRL BIT(2) @@ -51,6 +51,7 @@ struct gdsc { #define POLL_CFG_GDSCR BIT(5) #define ALWAYS_ON BIT(6) #define RETAIN_FF_ENABLE BIT(7) +#define NO_RET_PERIPH BIT(8) struct reset_controller_dev *rcdev; unsigned int *resets; unsigned int reset_count; diff --git a/drivers/clk/qcom/gpucc-msm8998.c b/drivers/clk/qcom/gpucc-msm8998.c index 9b3923af02a1..1a518c4915b4 100644 --- a/drivers/clk/qcom/gpucc-msm8998.c +++ b/drivers/clk/qcom/gpucc-msm8998.c @@ -253,12 +253,16 @@ static struct gdsc gpu_cx_gdsc = { static struct gdsc gpu_gx_gdsc = { .gdscr = 0x1094, .clamp_io_ctrl = 0x130, + .resets = (unsigned int []){ GPU_GX_BCR }, + .reset_count = 1, + .cxcs = (unsigned int []){ 0x1098 }, + .cxc_count = 1, .pd = { .name = "gpu_gx", }, .parent = &gpu_cx_gdsc.pd, - .pwrsts = PWRSTS_OFF_ON, - .flags = CLAMP_IO | AON_RESET, + .pwrsts = PWRSTS_OFF_ON | PWRSTS_RET, + .flags = CLAMP_IO | SW_RESET | AON_RESET | NO_RET_PERIPH, }; static struct clk_regmap *gpucc_msm8998_clocks[] = { diff --git a/drivers/clk/qcom/lpass-gfm-sm8250.c b/drivers/clk/qcom/lpass-gfm-sm8250.c index d366c7c2abc7..f5e31e692b9b 100644 --- a/drivers/clk/qcom/lpass-gfm-sm8250.c +++ b/drivers/clk/qcom/lpass-gfm-sm8250.c @@ -33,14 +33,13 @@ struct clk_gfm { void __iomem *gfm_mux; }; -#define GFM_MASK BIT(1) #define to_clk_gfm(_hw) container_of(_hw, struct clk_gfm, hw) static u8 clk_gfm_get_parent(struct clk_hw *hw) { struct clk_gfm *clk = to_clk_gfm(hw); - return readl(clk->gfm_mux) & GFM_MASK; + return readl(clk->gfm_mux) & clk->mux_mask; } static int clk_gfm_set_parent(struct clk_hw *hw, u8 index) @@ -51,9 +50,10 @@ static int clk_gfm_set_parent(struct clk_hw *hw, u8 index) val = readl(clk->gfm_mux); if (index) - val |= GFM_MASK; + val |= clk->mux_mask; else - val &= ~GFM_MASK; + val &= ~clk->mux_mask; + writel(val, clk->gfm_mux); diff --git a/drivers/clk/renesas/r8a779a0-cpg-mssr.c b/drivers/clk/renesas/r8a779a0-cpg-mssr.c index aa5389b04d74..7b2c640c3de0 100644 --- a/drivers/clk/renesas/r8a779a0-cpg-mssr.c +++ b/drivers/clk/renesas/r8a779a0-cpg-mssr.c @@ -69,7 +69,6 @@ enum clk_ids { CLK_PLL5_DIV2, CLK_PLL5_DIV4, CLK_S1, - CLK_S2, CLK_S3, CLK_SDSRC, CLK_RPCSRC, @@ -137,7 +136,7 @@ static const struct cpg_core_clk r8a779a0_core_clks[] __initconst = { DEF_FIXED("icu", R8A779A0_CLK_ICU, CLK_PLL5_DIV4, 2, 1), DEF_FIXED("icud2", R8A779A0_CLK_ICUD2, CLK_PLL5_DIV4, 4, 1), DEF_FIXED("vcbus", R8A779A0_CLK_VCBUS, CLK_PLL5_DIV4, 1, 1), - DEF_FIXED("cbfusa", R8A779A0_CLK_CBFUSA, CLK_MAIN, 2, 1), + DEF_FIXED("cbfusa", R8A779A0_CLK_CBFUSA, CLK_EXTAL, 2, 1), DEF_DIV6P1("mso", R8A779A0_CLK_MSO, CLK_PLL5_DIV4, 0x87c), DEF_DIV6P1("canfd", R8A779A0_CLK_CANFD, CLK_PLL5_DIV4, 0x878), diff --git a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c index f2497d0a4683..bff446b78290 100644 --- a/drivers/clk/sunxi-ng/ccu-sun50i-h6.c +++ b/drivers/clk/sunxi-ng/ccu-sun50i-h6.c @@ -237,7 +237,7 @@ static const char * const psi_ahb1_ahb2_parents[] = { "osc24M", "osc32k", static SUNXI_CCU_MP_WITH_MUX(psi_ahb1_ahb2_clk, "psi-ahb1-ahb2", psi_ahb1_ahb2_parents, 0x510, - 0, 5, /* M */ + 0, 2, /* M */ 8, 2, /* P */ 24, 2, /* mux */ 0); @@ -246,19 +246,19 @@ static const char * const ahb3_apb1_apb2_parents[] = { "osc24M", "osc32k", "psi-ahb1-ahb2", "pll-periph0" }; static SUNXI_CCU_MP_WITH_MUX(ahb3_clk, "ahb3", ahb3_apb1_apb2_parents, 0x51c, - 0, 5, /* M */ + 0, 2, /* M */ 8, 2, /* P */ 24, 2, /* mux */ 0); static SUNXI_CCU_MP_WITH_MUX(apb1_clk, "apb1", ahb3_apb1_apb2_parents, 0x520, - 0, 5, /* M */ + 0, 2, /* M */ 8, 2, /* P */ 24, 2, /* mux */ 0); static SUNXI_CCU_MP_WITH_MUX(apb2_clk, "apb2", ahb3_apb1_apb2_parents, 0x524, - 0, 5, /* M */ + 0, 2, /* M */ 8, 2, /* P */ 24, 2, /* mux */ 0); @@ -682,7 +682,7 @@ static struct ccu_mux hdmi_cec_clk = { .common = { .reg = 0xb10, - .features = CCU_FEATURE_VARIABLE_PREDIV, + .features = CCU_FEATURE_FIXED_PREDIV, .hw.init = CLK_HW_INIT_PARENTS("hdmi-cec", hdmi_cec_parents, &ccu_mux_ops, diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 14c7c4712478..66be9ea69e33 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -79,6 +79,7 @@ config IXP4XX_TIMER bool "Intel XScale IXP4xx timer driver" if COMPILE_TEST depends on HAS_IOMEM select CLKSRC_MMIO + select TIMER_OF if OF help Enables support for the Intel XScale IXP4xx SoC timer. diff --git a/drivers/clocksource/mxs_timer.c b/drivers/clocksource/mxs_timer.c index bc96a4cbf26c..e52e12d27d2a 100644 --- a/drivers/clocksource/mxs_timer.c +++ b/drivers/clocksource/mxs_timer.c @@ -131,10 +131,7 @@ static void mxs_irq_clear(char *state) /* Clear pending interrupt */ timrot_irq_acknowledge(); - -#ifdef DEBUG - pr_info("%s: changing mode to %s\n", __func__, state) -#endif /* DEBUG */ + pr_debug("%s: changing mode to %s\n", __func__, state); } static int mxs_shutdown(struct clock_event_device *evt) diff --git a/drivers/counter/stm32-timer-cnt.c b/drivers/counter/stm32-timer-cnt.c index ef2a974a2f10..75bc401fdd18 100644 --- a/drivers/counter/stm32-timer-cnt.c +++ b/drivers/counter/stm32-timer-cnt.c @@ -31,7 +31,7 @@ struct stm32_timer_cnt { struct counter_device counter; struct regmap *regmap; struct clk *clk; - u32 ceiling; + u32 max_arr; bool enabled; struct stm32_timer_regs bak; }; @@ -44,13 +44,14 @@ struct stm32_timer_cnt { * @STM32_COUNT_ENCODER_MODE_3: counts on both TI1FP1 and TI2FP2 edges */ enum stm32_count_function { - STM32_COUNT_SLAVE_MODE_DISABLED = -1, + STM32_COUNT_SLAVE_MODE_DISABLED, STM32_COUNT_ENCODER_MODE_1, STM32_COUNT_ENCODER_MODE_2, STM32_COUNT_ENCODER_MODE_3, }; static enum counter_count_function stm32_count_functions[] = { + [STM32_COUNT_SLAVE_MODE_DISABLED] = COUNTER_COUNT_FUNCTION_INCREASE, [STM32_COUNT_ENCODER_MODE_1] = COUNTER_COUNT_FUNCTION_QUADRATURE_X2_A, [STM32_COUNT_ENCODER_MODE_2] = COUNTER_COUNT_FUNCTION_QUADRATURE_X2_B, [STM32_COUNT_ENCODER_MODE_3] = COUNTER_COUNT_FUNCTION_QUADRATURE_X4, @@ -73,8 +74,10 @@ static int stm32_count_write(struct counter_device *counter, const unsigned long val) { struct stm32_timer_cnt *const priv = counter->priv; + u32 ceiling; - if (val > priv->ceiling) + regmap_read(priv->regmap, TIM_ARR, &ceiling); + if (val > ceiling) return -EINVAL; return regmap_write(priv->regmap, TIM_CNT, val); @@ -90,6 +93,9 @@ static int stm32_count_function_get(struct counter_device *counter, regmap_read(priv->regmap, TIM_SMCR, &smcr); switch (smcr & TIM_SMCR_SMS) { + case 0: + *function = STM32_COUNT_SLAVE_MODE_DISABLED; + return 0; case 1: *function = STM32_COUNT_ENCODER_MODE_1; return 0; @@ -99,9 +105,9 @@ static int stm32_count_function_get(struct counter_device *counter, case 3: *function = STM32_COUNT_ENCODER_MODE_3; return 0; + default: + return -EINVAL; } - - return -EINVAL; } static int stm32_count_function_set(struct counter_device *counter, @@ -112,6 +118,9 @@ static int stm32_count_function_set(struct counter_device *counter, u32 cr1, sms; switch (function) { + case STM32_COUNT_SLAVE_MODE_DISABLED: + sms = 0; + break; case STM32_COUNT_ENCODER_MODE_1: sms = 1; break; @@ -122,8 +131,7 @@ static int stm32_count_function_set(struct counter_device *counter, sms = 3; break; default: - sms = 0; - break; + return -EINVAL; } /* Store enable status */ @@ -131,10 +139,6 @@ static int stm32_count_function_set(struct counter_device *counter, regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_CEN, 0); - /* TIMx_ARR register shouldn't be buffered (ARPE=0) */ - regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_ARPE, 0); - regmap_write(priv->regmap, TIM_ARR, priv->ceiling); - regmap_update_bits(priv->regmap, TIM_SMCR, TIM_SMCR_SMS, sms); /* Make sure that registers are updated */ @@ -185,11 +189,13 @@ static ssize_t stm32_count_ceiling_write(struct counter_device *counter, if (ret) return ret; + if (ceiling > priv->max_arr) + return -ERANGE; + /* TIMx_ARR register shouldn't be buffered (ARPE=0) */ regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_ARPE, 0); regmap_write(priv->regmap, TIM_ARR, ceiling); - priv->ceiling = ceiling; return len; } @@ -274,31 +280,36 @@ static int stm32_action_get(struct counter_device *counter, size_t function; int err; - /* Default action mode (e.g. STM32_COUNT_SLAVE_MODE_DISABLED) */ - *action = STM32_SYNAPSE_ACTION_NONE; - err = stm32_count_function_get(counter, count, &function); if (err) - return 0; + return err; switch (function) { + case STM32_COUNT_SLAVE_MODE_DISABLED: + /* counts on internal clock when CEN=1 */ + *action = STM32_SYNAPSE_ACTION_NONE; + return 0; case STM32_COUNT_ENCODER_MODE_1: /* counts up/down on TI1FP1 edge depending on TI2FP2 level */ if (synapse->signal->id == count->synapses[0].signal->id) *action = STM32_SYNAPSE_ACTION_BOTH_EDGES; - break; + else + *action = STM32_SYNAPSE_ACTION_NONE; + return 0; case STM32_COUNT_ENCODER_MODE_2: /* counts up/down on TI2FP2 edge depending on TI1FP1 level */ if (synapse->signal->id == count->synapses[1].signal->id) *action = STM32_SYNAPSE_ACTION_BOTH_EDGES; - break; + else + *action = STM32_SYNAPSE_ACTION_NONE; + return 0; case STM32_COUNT_ENCODER_MODE_3: /* counts up/down on both TI1FP1 and TI2FP2 edges */ *action = STM32_SYNAPSE_ACTION_BOTH_EDGES; - break; + return 0; + default: + return -EINVAL; } - - return 0; } static const struct counter_ops stm32_timer_cnt_ops = { @@ -359,7 +370,7 @@ static int stm32_timer_cnt_probe(struct platform_device *pdev) priv->regmap = ddata->regmap; priv->clk = ddata->clk; - priv->ceiling = ddata->max_arr; + priv->max_arr = ddata->max_arr; priv->counter.name = dev_name(dev); priv->counter.parent = dev; diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index d3e5a6fceb61..d1bbc16fba4b 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -54,7 +54,6 @@ struct acpi_cpufreq_data { unsigned int resume; unsigned int cpu_feature; unsigned int acpi_perf_cpu; - unsigned int first_perf_state; cpumask_var_t freqdomain_cpus; void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val); u32 (*cpu_freq_read)(struct acpi_pct_register *reg); @@ -223,10 +222,10 @@ static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr) perf = to_perf_data(data); - cpufreq_for_each_entry(pos, policy->freq_table + data->first_perf_state) + cpufreq_for_each_entry(pos, policy->freq_table) if (msr == perf->states[pos->driver_data].status) return pos->frequency; - return policy->freq_table[data->first_perf_state].frequency; + return policy->freq_table[0].frequency; } static unsigned extract_freq(struct cpufreq_policy *policy, u32 val) @@ -365,7 +364,6 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) struct cpufreq_policy *policy; unsigned int freq; unsigned int cached_freq; - unsigned int state; pr_debug("%s (%d)\n", __func__, cpu); @@ -377,11 +375,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu) if (unlikely(!data || !policy->freq_table)) return 0; - state = to_perf_data(data)->state; - if (state < data->first_perf_state) - state = data->first_perf_state; - - cached_freq = policy->freq_table[state].frequency; + cached_freq = policy->freq_table[to_perf_data(data)->state].frequency; freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data)); if (freq != cached_freq) { /* @@ -680,7 +674,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) struct cpuinfo_x86 *c = &cpu_data(cpu); unsigned int valid_states = 0; unsigned int result = 0; - unsigned int state_count; u64 max_boost_ratio; unsigned int i; #ifdef CONFIG_SMP @@ -795,28 +788,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) goto err_unreg; } - state_count = perf->state_count + 1; - - max_boost_ratio = get_max_boost_ratio(cpu); - if (max_boost_ratio) { - /* - * Make a room for one more entry to represent the highest - * available "boost" frequency. - */ - state_count++; - valid_states++; - data->first_perf_state = valid_states; - } else { - /* - * If the maximum "boost" frequency is unknown, ask the arch - * scale-invariance code to use the "nominal" performance for - * CPU utilization scaling so as to prevent the schedutil - * governor from selecting inadequate CPU frequencies. - */ - arch_set_max_freq_ratio(true); - } - - freq_table = kcalloc(state_count, sizeof(*freq_table), GFP_KERNEL); + freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table), + GFP_KERNEL); if (!freq_table) { result = -ENOMEM; goto err_unreg; @@ -851,27 +824,25 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) } freq_table[valid_states].frequency = CPUFREQ_TABLE_END; + max_boost_ratio = get_max_boost_ratio(cpu); if (max_boost_ratio) { - unsigned int state = data->first_perf_state; - unsigned int freq = freq_table[state].frequency; + unsigned int freq = freq_table[0].frequency; /* * Because the loop above sorts the freq_table entries in the * descending order, freq is the maximum frequency in the table. * Assume that it corresponds to the CPPC nominal frequency and - * use it to populate the frequency field of the extra "boost" - * frequency entry. + * use it to set cpuinfo.max_freq. */ - freq_table[0].frequency = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT; + policy->cpuinfo.max_freq = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT; + } else { /* - * The purpose of the extra "boost" frequency entry is to make - * the rest of cpufreq aware of the real maximum frequency, but - * the way to request it is the same as for the first_perf_state - * entry that is expected to cover the entire range of "boost" - * frequencies of the CPU, so copy the driver_data value from - * that entry. + * If the maximum "boost" frequency is unknown, ask the arch + * scale-invariance code to use the "nominal" performance for + * CPU utilization scaling so as to prevent the schedutil + * governor from selecting inadequate CPU frequencies. */ - freq_table[0].driver_data = freq_table[state].driver_data; + arch_set_max_freq_ratio(true); } policy->freq_table = freq_table; @@ -947,8 +918,7 @@ static void acpi_cpufreq_cpu_ready(struct cpufreq_policy *policy) { struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data, policy->cpu); - struct acpi_cpufreq_data *data = policy->driver_data; - unsigned int freq = policy->freq_table[data->first_perf_state].frequency; + unsigned int freq = policy->freq_table[0].frequency; if (perf->states[0].core_frequency * 1000 != freq) pr_warn(FW_WARN "P-state 0 is not max freq\n"); diff --git a/drivers/cpufreq/brcmstb-avs-cpufreq.c b/drivers/cpufreq/brcmstb-avs-cpufreq.c index 3e31e5d28b79..4153150e20db 100644 --- a/drivers/cpufreq/brcmstb-avs-cpufreq.c +++ b/drivers/cpufreq/brcmstb-avs-cpufreq.c @@ -597,6 +597,16 @@ static int brcm_avs_prepare_init(struct platform_device *pdev) return ret; } +static void brcm_avs_prepare_uninit(struct platform_device *pdev) +{ + struct private_data *priv; + + priv = platform_get_drvdata(pdev); + + iounmap(priv->avs_intr_base); + iounmap(priv->base); +} + static int brcm_avs_cpufreq_init(struct cpufreq_policy *policy) { struct cpufreq_frequency_table *freq_table; @@ -732,21 +742,21 @@ static int brcm_avs_cpufreq_probe(struct platform_device *pdev) brcm_avs_driver.driver_data = pdev; - return cpufreq_register_driver(&brcm_avs_driver); + ret = cpufreq_register_driver(&brcm_avs_driver); + if (ret) + brcm_avs_prepare_uninit(pdev); + + return ret; } static int brcm_avs_cpufreq_remove(struct platform_device *pdev) { - struct private_data *priv; int ret; ret = cpufreq_unregister_driver(&brcm_avs_driver); - if (ret) - return ret; + WARN_ON(ret); - priv = platform_get_drvdata(pdev); - iounmap(priv->base); - iounmap(priv->avs_intr_base); + brcm_avs_prepare_uninit(pdev); return 0; } diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c index bd2db0188cbb..91e6a0c10dbf 100644 --- a/drivers/cpufreq/cpufreq-dt-platdev.c +++ b/drivers/cpufreq/cpufreq-dt-platdev.c @@ -103,6 +103,8 @@ static const struct of_device_id whitelist[] __initconst = { static const struct of_device_id blacklist[] __initconst = { { .compatible = "allwinner,sun50i-h6", }, + { .compatible = "arm,vexpress", }, + { .compatible = "calxeda,highbank", }, { .compatible = "calxeda,ecx-2000", }, diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c index f839dc9852c0..d3f756f7b5a0 100644 --- a/drivers/cpufreq/freq_table.c +++ b/drivers/cpufreq/freq_table.c @@ -52,7 +52,13 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, } policy->min = policy->cpuinfo.min_freq = min_freq; - policy->max = policy->cpuinfo.max_freq = max_freq; + policy->max = max_freq; + /* + * If the driver has set its own cpuinfo.max_freq above max_freq, leave + * it as is. + */ + if (policy->cpuinfo.max_freq < max_freq) + policy->max = policy->cpuinfo.max_freq = max_freq; if (policy->min == ~0) return -EINVAL; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index be05e038d956..c4d8a5126d61 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -819,13 +819,13 @@ static struct freq_attr *hwp_cpufreq_attrs[] = { NULL, }; -static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max, +static void intel_pstate_get_hwp_max(struct cpudata *cpu, int *phy_max, int *current_max) { u64 cap; - rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap); - WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap); + rdmsrl_on_cpu(cpu->cpu, MSR_HWP_CAPABILITIES, &cap); + WRITE_ONCE(cpu->hwp_cap_cached, cap); if (global.no_turbo || global.turbo_disabled) *current_max = HWP_GUARANTEED_PERF(cap); else @@ -1213,7 +1213,7 @@ static void update_qos_request(enum freq_qos_req_type type) continue; if (hwp_active) - intel_pstate_get_hwp_max(i, &turbo_max, &max_state); + intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); else turbo_max = cpu->pstate.turbo_pstate; @@ -1714,21 +1714,22 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu) static void intel_pstate_get_cpu_pstates(struct cpudata *cpu) { cpu->pstate.min_pstate = pstate_funcs.get_min(); - cpu->pstate.max_pstate = pstate_funcs.get_max(); cpu->pstate.max_pstate_physical = pstate_funcs.get_max_physical(); cpu->pstate.turbo_pstate = pstate_funcs.get_turbo(); cpu->pstate.scaling = pstate_funcs.get_scaling(); - cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; if (hwp_active && !hwp_mode_bdw) { unsigned int phy_max, current_max; - intel_pstate_get_hwp_max(cpu->cpu, &phy_max, ¤t_max); + intel_pstate_get_hwp_max(cpu, &phy_max, ¤t_max); cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling; cpu->pstate.turbo_pstate = phy_max; + cpu->pstate.max_pstate = HWP_GUARANTEED_PERF(READ_ONCE(cpu->hwp_cap_cached)); } else { cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling; + cpu->pstate.max_pstate = pstate_funcs.get_max(); } + cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling; if (pstate_funcs.get_aperf_mperf_shift) cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift(); @@ -2207,7 +2208,7 @@ static void intel_pstate_update_perf_limits(struct cpudata *cpu, * rather than pure ratios. */ if (hwp_active) { - intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state); + intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); } else { max_state = global.no_turbo || global.turbo_disabled ? cpu->pstate.max_pstate : cpu->pstate.turbo_pstate; @@ -2322,7 +2323,7 @@ static void intel_pstate_verify_cpu_policy(struct cpudata *cpu, if (hwp_active) { int max_state, turbo_max; - intel_pstate_get_hwp_max(cpu->cpu, &turbo_max, &max_state); + intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); max_freq = max_state * cpu->pstate.scaling; } else { max_freq = intel_pstate_get_max_freq(cpu); @@ -2709,7 +2710,7 @@ static int intel_cpufreq_cpu_init(struct cpufreq_policy *policy) if (hwp_active) { u64 value; - intel_pstate_get_hwp_max(policy->cpu, &turbo_max, &max_state); + intel_pstate_get_hwp_max(cpu, &turbo_max, &max_state); policy->transition_delay_us = INTEL_CPUFREQ_TRANSITION_DELAY_HWP; rdmsrl_on_cpu(cpu->cpu, MSR_HWP_REQUEST, &value); WRITE_ONCE(cpu->hwp_req_cached, value); diff --git a/drivers/cpufreq/qcom-cpufreq-hw.c b/drivers/cpufreq/qcom-cpufreq-hw.c index 9ed5341dc515..6de07556665b 100644 --- a/drivers/cpufreq/qcom-cpufreq-hw.c +++ b/drivers/cpufreq/qcom-cpufreq-hw.c @@ -32,6 +32,7 @@ struct qcom_cpufreq_soc_data { struct qcom_cpufreq_data { void __iomem *base; + struct resource *res; const struct qcom_cpufreq_soc_data *soc_data; }; @@ -280,6 +281,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) struct of_phandle_args args; struct device_node *cpu_np; struct device *cpu_dev; + struct resource *res; void __iomem *base; struct qcom_cpufreq_data *data; int ret, index; @@ -303,18 +305,33 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) index = args.args[0]; - base = devm_platform_ioremap_resource(pdev, index); - if (IS_ERR(base)) - return PTR_ERR(base); + res = platform_get_resource(pdev, IORESOURCE_MEM, index); + if (!res) { + dev_err(dev, "failed to get mem resource %d\n", index); + return -ENODEV; + } - data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL); + if (!request_mem_region(res->start, resource_size(res), res->name)) { + dev_err(dev, "failed to request resource %pR\n", res); + return -EBUSY; + } + + base = ioremap(res->start, resource_size(res)); + if (!base) { + dev_err(dev, "failed to map resource %pR\n", res); + ret = -ENOMEM; + goto release_region; + } + + data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) { ret = -ENOMEM; - goto error; + goto unmap_base; } data->soc_data = of_device_get_match_data(&pdev->dev); data->base = base; + data->res = res; /* HW should be in enabled state to proceed */ if (!(readl_relaxed(base + data->soc_data->reg_enable) & 0x1)) { @@ -349,7 +366,11 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy) return 0; error: - devm_iounmap(dev, base); + kfree(data); +unmap_base: + iounmap(base); +release_region: + release_mem_region(res->start, resource_size(res)); return ret; } @@ -357,12 +378,15 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy) { struct device *cpu_dev = get_cpu_device(policy->cpu); struct qcom_cpufreq_data *data = policy->driver_data; - struct platform_device *pdev = cpufreq_get_driver_data(); + struct resource *res = data->res; + void __iomem *base = data->base; dev_pm_opp_remove_all_dynamic(cpu_dev); dev_pm_opp_of_cpumask_remove_table(policy->related_cpus); kfree(policy->freq_table); - devm_iounmap(&pdev->dev, data->base); + kfree(data); + iounmap(base); + release_mem_region(res->start, resource_size(res)); return 0; } diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c index b72de8939497..ffa628c89e21 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss-cipher.c @@ -20,6 +20,7 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) unsigned int ivsize = crypto_skcipher_ivsize(tfm); struct sun4i_cipher_req_ctx *ctx = skcipher_request_ctx(areq); u32 mode = ctx->mode; + void *backup_iv = NULL; /* when activating SS, the default FIFO space is SS_RX_DEFAULT(32) */ u32 rx_cnt = SS_RX_DEFAULT; u32 tx_cnt = 0; @@ -30,6 +31,8 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) unsigned int ileft = areq->cryptlen; unsigned int oleft = areq->cryptlen; unsigned int todo; + unsigned long pi = 0, po = 0; /* progress for in and out */ + bool miter_err; struct sg_mapping_iter mi, mo; unsigned int oi, oo; /* offset for in and out */ unsigned long flags; @@ -42,52 +45,71 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) return -EINVAL; } + if (areq->iv && ivsize > 0 && mode & SS_DECRYPTION) { + backup_iv = kzalloc(ivsize, GFP_KERNEL); + if (!backup_iv) + return -ENOMEM; + scatterwalk_map_and_copy(backup_iv, areq->src, areq->cryptlen - ivsize, ivsize, 0); + } + spin_lock_irqsave(&ss->slock, flags); - for (i = 0; i < op->keylen; i += 4) - writel(*(op->key + i / 4), ss->base + SS_KEY0 + i); + for (i = 0; i < op->keylen / 4; i++) + writesl(ss->base + SS_KEY0 + i * 4, &op->key[i], 1); if (areq->iv) { for (i = 0; i < 4 && i < ivsize / 4; i++) { v = *(u32 *)(areq->iv + i * 4); - writel(v, ss->base + SS_IV0 + i * 4); + writesl(ss->base + SS_IV0 + i * 4, &v, 1); } } writel(mode, ss->base + SS_CTL); - sg_miter_start(&mi, areq->src, sg_nents(areq->src), - SG_MITER_FROM_SG | SG_MITER_ATOMIC); - sg_miter_start(&mo, areq->dst, sg_nents(areq->dst), - SG_MITER_TO_SG | SG_MITER_ATOMIC); - sg_miter_next(&mi); - sg_miter_next(&mo); - if (!mi.addr || !mo.addr) { - dev_err_ratelimited(ss->dev, "ERROR: sg_miter return null\n"); - err = -EINVAL; - goto release_ss; - } ileft = areq->cryptlen / 4; oleft = areq->cryptlen / 4; oi = 0; oo = 0; do { - todo = min(rx_cnt, ileft); - todo = min_t(size_t, todo, (mi.length - oi) / 4); - if (todo) { - ileft -= todo; - writesl(ss->base + SS_RXFIFO, mi.addr + oi, todo); - oi += todo * 4; - } - if (oi == mi.length) { - sg_miter_next(&mi); - oi = 0; + if (ileft) { + sg_miter_start(&mi, areq->src, sg_nents(areq->src), + SG_MITER_FROM_SG | SG_MITER_ATOMIC); + if (pi) + sg_miter_skip(&mi, pi); + miter_err = sg_miter_next(&mi); + if (!miter_err || !mi.addr) { + dev_err_ratelimited(ss->dev, "ERROR: sg_miter return null\n"); + err = -EINVAL; + goto release_ss; + } + todo = min(rx_cnt, ileft); + todo = min_t(size_t, todo, (mi.length - oi) / 4); + if (todo) { + ileft -= todo; + writesl(ss->base + SS_RXFIFO, mi.addr + oi, todo); + oi += todo * 4; + } + if (oi == mi.length) { + pi += mi.length; + oi = 0; + } + sg_miter_stop(&mi); } spaces = readl(ss->base + SS_FCSR); rx_cnt = SS_RXFIFO_SPACES(spaces); tx_cnt = SS_TXFIFO_SPACES(spaces); + sg_miter_start(&mo, areq->dst, sg_nents(areq->dst), + SG_MITER_TO_SG | SG_MITER_ATOMIC); + if (po) + sg_miter_skip(&mo, po); + miter_err = sg_miter_next(&mo); + if (!miter_err || !mo.addr) { + dev_err_ratelimited(ss->dev, "ERROR: sg_miter return null\n"); + err = -EINVAL; + goto release_ss; + } todo = min(tx_cnt, oleft); todo = min_t(size_t, todo, (mo.length - oo) / 4); if (todo) { @@ -96,21 +118,23 @@ static int noinline_for_stack sun4i_ss_opti_poll(struct skcipher_request *areq) oo += todo * 4; } if (oo == mo.length) { - sg_miter_next(&mo); oo = 0; + po += mo.length; } + sg_miter_stop(&mo); } while (oleft); if (areq->iv) { - for (i = 0; i < 4 && i < ivsize / 4; i++) { - v = readl(ss->base + SS_IV0 + i * 4); - *(u32 *)(areq->iv + i * 4) = v; + if (mode & SS_DECRYPTION) { + memcpy(areq->iv, backup_iv, ivsize); + kfree_sensitive(backup_iv); + } else { + scatterwalk_map_and_copy(areq->iv, areq->dst, areq->cryptlen - ivsize, + ivsize, 0); } } release_ss: - sg_miter_stop(&mi); - sg_miter_stop(&mo); writel(0, ss->base + SS_CTL); spin_unlock_irqrestore(&ss->slock, flags); return err; @@ -161,13 +185,16 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) unsigned int ileft = areq->cryptlen; unsigned int oleft = areq->cryptlen; unsigned int todo; + void *backup_iv = NULL; struct sg_mapping_iter mi, mo; + unsigned long pi = 0, po = 0; /* progress for in and out */ + bool miter_err; unsigned int oi, oo; /* offset for in and out */ unsigned int ob = 0; /* offset in buf */ unsigned int obo = 0; /* offset in bufo*/ unsigned int obl = 0; /* length of data in bufo */ unsigned long flags; - bool need_fallback; + bool need_fallback = false; if (!areq->cryptlen) return 0; @@ -186,12 +213,12 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) * we can use the SS optimized function */ while (in_sg && no_chunk == 1) { - if (in_sg->length % 4) + if ((in_sg->length | in_sg->offset) & 3u) no_chunk = 0; in_sg = sg_next(in_sg); } while (out_sg && no_chunk == 1) { - if (out_sg->length % 4) + if ((out_sg->length | out_sg->offset) & 3u) no_chunk = 0; out_sg = sg_next(out_sg); } @@ -202,30 +229,26 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) if (need_fallback) return sun4i_ss_cipher_poll_fallback(areq); + if (areq->iv && ivsize > 0 && mode & SS_DECRYPTION) { + backup_iv = kzalloc(ivsize, GFP_KERNEL); + if (!backup_iv) + return -ENOMEM; + scatterwalk_map_and_copy(backup_iv, areq->src, areq->cryptlen - ivsize, ivsize, 0); + } + spin_lock_irqsave(&ss->slock, flags); - for (i = 0; i < op->keylen; i += 4) - writel(*(op->key + i / 4), ss->base + SS_KEY0 + i); + for (i = 0; i < op->keylen / 4; i++) + writesl(ss->base + SS_KEY0 + i * 4, &op->key[i], 1); if (areq->iv) { for (i = 0; i < 4 && i < ivsize / 4; i++) { v = *(u32 *)(areq->iv + i * 4); - writel(v, ss->base + SS_IV0 + i * 4); + writesl(ss->base + SS_IV0 + i * 4, &v, 1); } } writel(mode, ss->base + SS_CTL); - sg_miter_start(&mi, areq->src, sg_nents(areq->src), - SG_MITER_FROM_SG | SG_MITER_ATOMIC); - sg_miter_start(&mo, areq->dst, sg_nents(areq->dst), - SG_MITER_TO_SG | SG_MITER_ATOMIC); - sg_miter_next(&mi); - sg_miter_next(&mo); - if (!mi.addr || !mo.addr) { - dev_err_ratelimited(ss->dev, "ERROR: sg_miter return null\n"); - err = -EINVAL; - goto release_ss; - } ileft = areq->cryptlen; oleft = areq->cryptlen; oi = 0; @@ -233,8 +256,16 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) while (oleft) { if (ileft) { - char buf[4 * SS_RX_MAX];/* buffer for linearize SG src */ - + sg_miter_start(&mi, areq->src, sg_nents(areq->src), + SG_MITER_FROM_SG | SG_MITER_ATOMIC); + if (pi) + sg_miter_skip(&mi, pi); + miter_err = sg_miter_next(&mi); + if (!miter_err || !mi.addr) { + dev_err_ratelimited(ss->dev, "ERROR: sg_miter return null\n"); + err = -EINVAL; + goto release_ss; + } /* * todo is the number of consecutive 4byte word that we * can read from current SG @@ -256,52 +287,57 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) */ todo = min(rx_cnt * 4 - ob, ileft); todo = min_t(size_t, todo, mi.length - oi); - memcpy(buf + ob, mi.addr + oi, todo); + memcpy(ss->buf + ob, mi.addr + oi, todo); ileft -= todo; oi += todo; ob += todo; if (!(ob % 4)) { - writesl(ss->base + SS_RXFIFO, buf, + writesl(ss->base + SS_RXFIFO, ss->buf, ob / 4); ob = 0; } } if (oi == mi.length) { - sg_miter_next(&mi); + pi += mi.length; oi = 0; } + sg_miter_stop(&mi); } spaces = readl(ss->base + SS_FCSR); rx_cnt = SS_RXFIFO_SPACES(spaces); tx_cnt = SS_TXFIFO_SPACES(spaces); - dev_dbg(ss->dev, - "%x %u/%zu %u/%u cnt=%u %u/%zu %u/%u cnt=%u %u\n", - mode, - oi, mi.length, ileft, areq->cryptlen, rx_cnt, - oo, mo.length, oleft, areq->cryptlen, tx_cnt, ob); if (!tx_cnt) continue; + sg_miter_start(&mo, areq->dst, sg_nents(areq->dst), + SG_MITER_TO_SG | SG_MITER_ATOMIC); + if (po) + sg_miter_skip(&mo, po); + miter_err = sg_miter_next(&mo); + if (!miter_err || !mo.addr) { + dev_err_ratelimited(ss->dev, "ERROR: sg_miter return null\n"); + err = -EINVAL; + goto release_ss; + } /* todo in 4bytes word */ todo = min(tx_cnt, oleft / 4); todo = min_t(size_t, todo, (mo.length - oo) / 4); + if (todo) { readsl(ss->base + SS_TXFIFO, mo.addr + oo, todo); oleft -= todo * 4; oo += todo * 4; if (oo == mo.length) { - sg_miter_next(&mo); + po += mo.length; oo = 0; } } else { - char bufo[4 * SS_TX_MAX]; /* buffer for linearize SG dst */ - /* * read obl bytes in bufo, we read at maximum for * emptying the device */ - readsl(ss->base + SS_TXFIFO, bufo, tx_cnt); + readsl(ss->base + SS_TXFIFO, ss->bufo, tx_cnt); obl = tx_cnt * 4; obo = 0; do { @@ -313,28 +349,31 @@ static int sun4i_ss_cipher_poll(struct skcipher_request *areq) */ todo = min_t(size_t, mo.length - oo, obl - obo); - memcpy(mo.addr + oo, bufo + obo, todo); + memcpy(mo.addr + oo, ss->bufo + obo, todo); oleft -= todo; obo += todo; oo += todo; if (oo == mo.length) { + po += mo.length; sg_miter_next(&mo); oo = 0; } } while (obo < obl); /* bufo must be fully used here */ } + sg_miter_stop(&mo); } if (areq->iv) { - for (i = 0; i < 4 && i < ivsize / 4; i++) { - v = readl(ss->base + SS_IV0 + i * 4); - *(u32 *)(areq->iv + i * 4) = v; + if (mode & SS_DECRYPTION) { + memcpy(areq->iv, backup_iv, ivsize); + kfree_sensitive(backup_iv); + } else { + scatterwalk_map_and_copy(areq->iv, areq->dst, areq->cryptlen - ivsize, + ivsize, 0); } } release_ss: - sg_miter_stop(&mi); - sg_miter_stop(&mo); writel(0, ss->base + SS_CTL); spin_unlock_irqrestore(&ss->slock, flags); diff --git a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h index 5c291e4a6857..c242fccb2ab6 100644 --- a/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h +++ b/drivers/crypto/allwinner/sun4i-ss/sun4i-ss.h @@ -148,6 +148,8 @@ struct sun4i_ss_ctx { struct reset_control *reset; struct device *dev; struct resource *res; + char buf[4 * SS_RX_MAX];/* buffer for linearize SG src */ + char bufo[4 * SS_TX_MAX]; /* buffer for linearize SG dst */ spinlock_t slock; /* control the use of the device */ #ifdef CONFIG_CRYPTO_DEV_SUN4I_SS_PRNG u32 seed[SS_SEED_LEN / BITS_PER_LONG]; diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c index 30390a7324b2..0e5537838ef3 100644 --- a/drivers/crypto/bcm/cipher.c +++ b/drivers/crypto/bcm/cipher.c @@ -42,7 +42,7 @@ /* ================= Device Structure ================== */ -struct device_private iproc_priv; +struct bcm_device_private iproc_priv; /* ==================== Parameters ===================== */ diff --git a/drivers/crypto/bcm/cipher.h b/drivers/crypto/bcm/cipher.h index 0ad5892b445d..71281a3bdbdc 100644 --- a/drivers/crypto/bcm/cipher.h +++ b/drivers/crypto/bcm/cipher.h @@ -420,7 +420,7 @@ struct spu_hw { u32 num_chan; }; -struct device_private { +struct bcm_device_private { struct platform_device *pdev; struct spu_hw spu; @@ -467,6 +467,6 @@ struct device_private { struct mbox_chan **mbox; }; -extern struct device_private iproc_priv; +extern struct bcm_device_private iproc_priv; #endif diff --git a/drivers/crypto/bcm/util.c b/drivers/crypto/bcm/util.c index 2b304fc78059..77aeedb84055 100644 --- a/drivers/crypto/bcm/util.c +++ b/drivers/crypto/bcm/util.c @@ -348,7 +348,7 @@ char *spu_alg_name(enum spu_cipher_alg alg, enum spu_cipher_mode mode) static ssize_t spu_debugfs_read(struct file *filp, char __user *ubuf, size_t count, loff_t *offp) { - struct device_private *ipriv; + struct bcm_device_private *ipriv; char *buf; ssize_t ret, out_offset, out_count; int i; diff --git a/drivers/crypto/qat/Kconfig b/drivers/crypto/qat/Kconfig index 846a3d90b41a..77783feb62b2 100644 --- a/drivers/crypto/qat/Kconfig +++ b/drivers/crypto/qat/Kconfig @@ -11,7 +11,7 @@ config CRYPTO_DEV_QAT select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 - select CRYPTO_AES + select CRYPTO_LIB_AES select FW_LOADER config CRYPTO_DEV_QAT_DH895xCC diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 4fd85f31630a..25c9f825b8b5 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1093,11 +1093,12 @@ static void ipsec_esp_decrypt_hwauth_done(struct device *dev, */ static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count, unsigned int offset, int datalen, int elen, - struct talitos_ptr *link_tbl_ptr) + struct talitos_ptr *link_tbl_ptr, int align) { int n_sg = elen ? sg_count + 1 : sg_count; int count = 0; int cryptlen = datalen + elen; + int padding = ALIGN(cryptlen, align) - cryptlen; while (cryptlen && sg && n_sg--) { unsigned int len = sg_dma_len(sg); @@ -1121,7 +1122,7 @@ static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count, offset += datalen; } to_talitos_ptr(link_tbl_ptr + count, - sg_dma_address(sg) + offset, len, 0); + sg_dma_address(sg) + offset, sg_next(sg) ? len : len + padding, 0); to_talitos_ptr_ext_set(link_tbl_ptr + count, 0, 0); count++; cryptlen -= len; @@ -1144,10 +1145,11 @@ static int talitos_sg_map_ext(struct device *dev, struct scatterlist *src, unsigned int len, struct talitos_edesc *edesc, struct talitos_ptr *ptr, int sg_count, unsigned int offset, int tbl_off, int elen, - bool force) + bool force, int align) { struct talitos_private *priv = dev_get_drvdata(dev); bool is_sec1 = has_ftr_sec1(priv); + int aligned_len = ALIGN(len, align); if (!src) { to_talitos_ptr(ptr, 0, 0, is_sec1); @@ -1155,22 +1157,22 @@ static int talitos_sg_map_ext(struct device *dev, struct scatterlist *src, } to_talitos_ptr_ext_set(ptr, elen, is_sec1); if (sg_count == 1 && !force) { - to_talitos_ptr(ptr, sg_dma_address(src) + offset, len, is_sec1); + to_talitos_ptr(ptr, sg_dma_address(src) + offset, aligned_len, is_sec1); return sg_count; } if (is_sec1) { - to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, len, is_sec1); + to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, aligned_len, is_sec1); return sg_count; } sg_count = sg_to_link_tbl_offset(src, sg_count, offset, len, elen, - &edesc->link_tbl[tbl_off]); + &edesc->link_tbl[tbl_off], align); if (sg_count == 1 && !force) { /* Only one segment now, so no link tbl needed*/ copy_talitos_ptr(ptr, &edesc->link_tbl[tbl_off], is_sec1); return sg_count; } to_talitos_ptr(ptr, edesc->dma_link_tbl + - tbl_off * sizeof(struct talitos_ptr), len, is_sec1); + tbl_off * sizeof(struct talitos_ptr), aligned_len, is_sec1); to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, is_sec1); return sg_count; @@ -1182,7 +1184,7 @@ static int talitos_sg_map(struct device *dev, struct scatterlist *src, unsigned int offset, int tbl_off) { return talitos_sg_map_ext(dev, src, len, edesc, ptr, sg_count, offset, - tbl_off, 0, false); + tbl_off, 0, false, 1); } /* @@ -1251,7 +1253,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, ret = talitos_sg_map_ext(dev, areq->src, cryptlen, edesc, &desc->ptr[4], sg_count, areq->assoclen, tbl_off, elen, - false); + false, 1); if (ret > 1) { tbl_off += ret; @@ -1271,7 +1273,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, elen = 0; ret = talitos_sg_map_ext(dev, areq->dst, cryptlen, edesc, &desc->ptr[5], sg_count, areq->assoclen, tbl_off, elen, - is_ipsec_esp && !encrypt); + is_ipsec_esp && !encrypt, 1); tbl_off += ret; if (!encrypt && is_ipsec_esp) { @@ -1577,6 +1579,8 @@ static int common_nonsnoop(struct talitos_edesc *edesc, bool sync_needed = false; struct talitos_private *priv = dev_get_drvdata(dev); bool is_sec1 = has_ftr_sec1(priv); + bool is_ctr = (desc->hdr & DESC_HDR_SEL0_MASK) == DESC_HDR_SEL0_AESU && + (desc->hdr & DESC_HDR_MODE0_AESU_MASK) == DESC_HDR_MODE0_AESU_CTR; /* first DWORD empty */ @@ -1597,8 +1601,8 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* * cipher in */ - sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc, - &desc->ptr[3], sg_count, 0, 0); + sg_count = talitos_sg_map_ext(dev, areq->src, cryptlen, edesc, &desc->ptr[3], + sg_count, 0, 0, 0, false, is_ctr ? 16 : 1); if (sg_count > 1) sync_needed = true; @@ -2761,6 +2765,22 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_SEL0_AESU | DESC_HDR_MODE0_AESU_CTR, }, + { .type = CRYPTO_ALG_TYPE_SKCIPHER, + .alg.skcipher = { + .base.cra_name = "ctr(aes)", + .base.cra_driver_name = "ctr-aes-talitos", + .base.cra_blocksize = 1, + .base.cra_flags = CRYPTO_ALG_ASYNC | + CRYPTO_ALG_ALLOCATES_MEMORY, + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = skcipher_aes_setkey, + }, + .desc_hdr_template = DESC_HDR_TYPE_COMMON_NONSNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CTR, + }, { .type = CRYPTO_ALG_TYPE_SKCIPHER, .alg.skcipher = { .base.cra_name = "ecb(des)", @@ -3178,6 +3198,12 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, t_alg->algt.alg.skcipher.setkey ?: skcipher_setkey; t_alg->algt.alg.skcipher.encrypt = skcipher_encrypt; t_alg->algt.alg.skcipher.decrypt = skcipher_decrypt; + if (!strcmp(alg->cra_name, "ctr(aes)") && !has_ftr_sec1(priv) && + DESC_TYPE(t_alg->algt.desc_hdr_template) != + DESC_TYPE(DESC_HDR_TYPE_AESU_CTR_NONSNOOP)) { + devm_kfree(dev, t_alg); + return ERR_PTR(-ENOTSUPP); + } break; case CRYPTO_ALG_TYPE_AEAD: alg = &t_alg->algt.alg.aead.base; diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index 1469b956948a..32825119e880 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -344,6 +344,7 @@ static inline bool has_ftr_sec1(struct talitos_private *priv) /* primary execution unit mode (MODE0) and derivatives */ #define DESC_HDR_MODE0_ENCRYPT cpu_to_be32(0x00100000) +#define DESC_HDR_MODE0_AESU_MASK cpu_to_be32(0x00600000) #define DESC_HDR_MODE0_AESU_CBC cpu_to_be32(0x00200000) #define DESC_HDR_MODE0_AESU_CTR cpu_to_be32(0x00600000) #define DESC_HDR_MODE0_DEU_CBC cpu_to_be32(0x00400000) diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 737b207c9e30..3003558c1a8b 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -1038,7 +1038,7 @@ static ssize_t range_parse(const char *opt, size_t len, struct range *range) { unsigned long long addr = 0; char *start, *end, *str; - ssize_t rc = EINVAL; + ssize_t rc = -EINVAL; str = kstrdup(opt, GFP_KERNEL); if (!str) diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 0feb323bae1e..f8459cc5315d 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -1214,6 +1214,7 @@ static int fsldma_of_probe(struct platform_device *op) { struct fsldma_device *fdev; struct device_node *child; + unsigned int i; int err; fdev = kzalloc(sizeof(*fdev), GFP_KERNEL); @@ -1292,6 +1293,10 @@ static int fsldma_of_probe(struct platform_device *op) return 0; out_free_fdev: + for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) { + if (fdev->chan[i]) + fsl_dma_chan_remove(fdev->chan[i]); + } irq_dispose_mapping(fdev->irq); iounmap(fdev->regs); out_free: @@ -1314,6 +1319,7 @@ static int fsldma_of_remove(struct platform_device *op) if (fdev->chan[i]) fsl_dma_chan_remove(fdev->chan[i]); } + irq_dispose_mapping(fdev->irq); iounmap(fdev->regs); kfree(fdev); diff --git a/drivers/dma/hsu/pci.c b/drivers/dma/hsu/pci.c index 07cc7320a614..9045a6f7f589 100644 --- a/drivers/dma/hsu/pci.c +++ b/drivers/dma/hsu/pci.c @@ -26,22 +26,12 @@ static irqreturn_t hsu_pci_irq(int irq, void *dev) { struct hsu_dma_chip *chip = dev; - struct pci_dev *pdev = to_pci_dev(chip->dev); u32 dmaisr; u32 status; unsigned short i; int ret = 0; int err; - /* - * On Intel Tangier B0 and Anniedale the interrupt line, disregarding - * to have different numbers, is shared between HSU DMA and UART IPs. - * Thus on such SoCs we are expecting that IRQ handler is called in - * UART driver only. - */ - if (pdev->device == PCI_DEVICE_ID_INTEL_MRFLD_HSU_DMA) - return IRQ_HANDLED; - dmaisr = readl(chip->regs + HSU_PCI_DMAISR); for (i = 0; i < chip->hsu->nr_channels; i++) { if (dmaisr & 0x1) { @@ -105,6 +95,17 @@ static int hsu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) goto err_register_irq; + /* + * On Intel Tangier B0 and Anniedale the interrupt line, disregarding + * to have different numbers, is shared between HSU DMA and UART IPs. + * Thus on such SoCs we are expecting that IRQ handler is called in + * UART driver only. Instead of handling the spurious interrupt + * from HSU DMA here and waste CPU time and delay HSU UART interrupt + * handling, disable the interrupt entirely. + */ + if (pdev->device == PCI_DEVICE_ID_INTEL_MRFLD_HSU_DMA) + disable_irq_nosync(chip->irq); + pci_set_drvdata(pdev, chip); return 0; diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c index 71fd6e4c42cd..a15e50126434 100644 --- a/drivers/dma/idxd/dma.c +++ b/drivers/dma/idxd/dma.c @@ -165,6 +165,7 @@ int idxd_register_dma_device(struct idxd_device *idxd) INIT_LIST_HEAD(&dma->channels); dma->dev = &idxd->pdev->dev; + dma_cap_set(DMA_PRIVATE, dma->cap_mask); dma_cap_set(DMA_COMPLETION_NO_ORDER, dma->cap_mask); dma->device_release = idxd_dma_release; diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c index 9fede32641e9..04202d75f4ee 100644 --- a/drivers/dma/owl-dma.c +++ b/drivers/dma/owl-dma.c @@ -1245,6 +1245,7 @@ static int owl_dma_remove(struct platform_device *pdev) owl_dma_free(od); clk_disable_unprepare(od->clk); + dma_pool_destroy(od->lli_pool); return 0; } diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c index 1a0bf6b0567a..e48eb397f433 100644 --- a/drivers/dma/qcom/gpi.c +++ b/drivers/dma/qcom/gpi.c @@ -584,7 +584,7 @@ static inline void gpi_write_reg_field(struct gpii *gpii, void __iomem *addr, gpi_write_reg(gpii, addr, val); } -static inline void +static __always_inline void gpi_update_reg(struct gpii *gpii, u32 offset, u32 mask, u32 val) { void __iomem *addr = gpii->regs + offset; diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c index f474a1232335..46bc1a419bdf 100644 --- a/drivers/dma/ti/k3-udma.c +++ b/drivers/dma/ti/k3-udma.c @@ -4306,6 +4306,7 @@ static int udma_get_mmrs(struct platform_device *pdev, struct udma_dev *ud) ud->bchan_cnt = BCDMA_CAP2_BCHAN_CNT(cap2); ud->tchan_cnt = BCDMA_CAP2_TCHAN_CNT(cap2); ud->rchan_cnt = BCDMA_CAP2_RCHAN_CNT(cap2); + ud->rflow_cnt = ud->rchan_cnt; break; case DMA_TYPE_PKTDMA: cap4 = udma_read(ud->mmrs[MMR_GCFG], 0x30); diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index f7087ddddb90..5754f429a8d2 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3342,10 +3342,13 @@ static struct amd64_family_type *per_family_init(struct amd64_pvt *pvt) fam_type = &family_types[F15_M60H_CPUS]; pvt->ops = &family_types[F15_M60H_CPUS].ops; break; + /* Richland is only client */ + } else if (pvt->model == 0x13) { + return NULL; + } else { + fam_type = &family_types[F15_CPUS]; + pvt->ops = &family_types[F15_CPUS].ops; } - - fam_type = &family_types[F15_CPUS]; - pvt->ops = &family_types[F15_CPUS].ops; break; case 0x16: @@ -3539,6 +3542,7 @@ static int probe_one_instance(unsigned int nid) pvt->mc_node_id = nid; pvt->F3 = F3; + ret = -ENODEV; fam_type = per_family_init(pvt); if (!fam_type) goto err_enable; diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 5392e1fc6b4e..cacdf1589b10 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -848,8 +848,6 @@ static int scmi_remove(struct platform_device *pdev) struct scmi_info *info = platform_get_drvdata(pdev); struct idr *idr = &info->tx_idr; - scmi_notification_exit(&info->handle); - mutex_lock(&scmi_list_mutex); if (info->users) ret = -EBUSY; @@ -860,6 +858,8 @@ static int scmi_remove(struct platform_device *pdev) if (ret) return ret; + scmi_notification_exit(&info->handle); + /* Safe to free channels since no more users */ ret = idr_for_each(idr, info->desc->ops->chan_free, idr); idr_destroy(&info->tx_idr); diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index df3f9bcab581..4b7ee3fa9224 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -927,7 +927,7 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) } /* first try to find a slot in an existing linked list entry */ - for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) { + for (prsv = efi_memreserve_root->next; prsv; ) { rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB); index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size); if (index < rsv->size) { @@ -937,6 +937,7 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size) memunmap(rsv); return efi_mem_reserve_iomem(addr, size); } + prsv = rsv->next; memunmap(rsv); } diff --git a/drivers/firmware/efi/libstub/efi-stub.c b/drivers/firmware/efi/libstub/efi-stub.c index ec2f3985bef3..26e69788f27a 100644 --- a/drivers/firmware/efi/libstub/efi-stub.c +++ b/drivers/firmware/efi/libstub/efi-stub.c @@ -96,6 +96,18 @@ static void install_memreserve_table(void) efi_err("Failed to install memreserve config table!\n"); } +static u32 get_supported_rt_services(void) +{ + const efi_rt_properties_table_t *rt_prop_table; + u32 supported = EFI_RT_SUPPORTED_ALL; + + rt_prop_table = get_efi_config_table(EFI_RT_PROPERTIES_TABLE_GUID); + if (rt_prop_table) + supported &= rt_prop_table->runtime_services_supported; + + return supported; +} + /* * EFI entry point for the arm/arm64 EFI stubs. This is the entrypoint * that is described in the PE/COFF header. Most of the code is the same @@ -250,6 +262,10 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle, (prop_tbl->memory_protection_attribute & EFI_PROPERTIES_RUNTIME_MEMORY_PROTECTION_NON_EXECUTABLE_PE_DATA); + /* force efi_novamap if SetVirtualAddressMap() is unsupported */ + efi_novamap |= !(get_supported_rt_services() & + EFI_RT_SUPPORTED_SET_VIRTUAL_ADDRESS_MAP); + /* hibernation expects the runtime regions to stay in the same place */ if (!IS_ENABLED(CONFIG_HIBERNATION) && !efi_nokaslr && !flat_va_mapping) { /* diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c index 41c1d00bf933..abdc8a6a3963 100644 --- a/drivers/firmware/efi/vars.c +++ b/drivers/firmware/efi/vars.c @@ -484,6 +484,10 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *), } } + break; + case EFI_UNSUPPORTED: + err = -EOPNOTSUPP; + status = EFI_NOT_FOUND; break; case EFI_NOT_FOUND: break; diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 825b362eb4b7..6898c27f71f8 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -112,8 +112,29 @@ MODULE_DEVICE_TABLE(i2c, pca953x_id); #ifdef CONFIG_GPIO_PCA953X_IRQ #include -#include -#include + +static const struct acpi_gpio_params pca953x_irq_gpios = { 0, 0, true }; + +static const struct acpi_gpio_mapping pca953x_acpi_irq_gpios[] = { + { "irq-gpios", &pca953x_irq_gpios, 1, ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER }, + { } +}; + +static int pca953x_acpi_get_irq(struct device *dev) +{ + int ret; + + ret = devm_acpi_dev_add_driver_gpios(dev, pca953x_acpi_irq_gpios); + if (ret) + dev_warn(dev, "can't add GPIO ACPI mapping\n"); + + ret = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(dev), "irq-gpios", 0); + if (ret < 0) + return ret; + + dev_info(dev, "ACPI interrupt quirk (IRQ %d)\n", ret); + return ret; +} static const struct dmi_system_id pca953x_dmi_acpi_irq_info[] = { { @@ -132,59 +153,6 @@ static const struct dmi_system_id pca953x_dmi_acpi_irq_info[] = { }, {} }; - -#ifdef CONFIG_ACPI -static int pca953x_acpi_get_pin(struct acpi_resource *ares, void *data) -{ - struct acpi_resource_gpio *agpio; - int *pin = data; - - if (acpi_gpio_get_irq_resource(ares, &agpio)) - *pin = agpio->pin_table[0]; - return 1; -} - -static int pca953x_acpi_find_pin(struct device *dev) -{ - struct acpi_device *adev = ACPI_COMPANION(dev); - int pin = -ENOENT, ret; - LIST_HEAD(r); - - ret = acpi_dev_get_resources(adev, &r, pca953x_acpi_get_pin, &pin); - acpi_dev_free_resource_list(&r); - if (ret < 0) - return ret; - - return pin; -} -#else -static inline int pca953x_acpi_find_pin(struct device *dev) { return -ENXIO; } -#endif - -static int pca953x_acpi_get_irq(struct device *dev) -{ - int pin, ret; - - pin = pca953x_acpi_find_pin(dev); - if (pin < 0) - return pin; - - dev_info(dev, "Applying ACPI interrupt quirk (GPIO %d)\n", pin); - - if (!gpio_is_valid(pin)) - return -EINVAL; - - ret = gpio_request(pin, "pca953x interrupt"); - if (ret) - return ret; - - ret = gpio_to_irq(pin); - - /* When pin is used as an IRQ, no need to keep it requested */ - gpio_free(pin); - - return ret; -} #endif static const struct acpi_device_id pca953x_acpi_ids[] = { diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c index a2a8d155c75e..b7568ee33696 100644 --- a/drivers/gpio/gpio-pcf857x.c +++ b/drivers/gpio/gpio-pcf857x.c @@ -332,7 +332,7 @@ static int pcf857x_probe(struct i2c_client *client, * reset state. Otherwise it flags pins to be driven low. */ gpio->out = ~n_latch; - gpio->status = gpio->out; + gpio->status = gpio->read(gpio->client); /* Enable irqchip if we have an interrupt */ if (client->irq) { diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index e37a57d0a2f0..1aacd2a5a1fd 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -174,7 +174,7 @@ static void acpi_gpiochip_request_irq(struct acpi_gpio_chip *acpi_gpio, int ret, value; ret = request_threaded_irq(event->irq, NULL, event->handler, - event->irqflags, "ACPI:Event", event); + event->irqflags | IRQF_ONESHOT, "ACPI:Event", event); if (ret) { dev_err(acpi_gpio->chip->parent, "Failed to setup interrupt handler for %d\n", @@ -677,6 +677,7 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data) if (!lookup->desc) { const struct acpi_resource_gpio *agpio = &ares->data.gpio; bool gpioint = agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT; + struct gpio_desc *desc; u16 pin_index; if (lookup->info.quirks & ACPI_GPIO_QUIRK_ONLY_GPIOIO && gpioint) @@ -689,8 +690,12 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data) if (pin_index >= agpio->pin_table_length) return 1; - lookup->desc = acpi_get_gpiod(agpio->resource_source.string_ptr, + if (lookup->info.quirks & ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER) + desc = gpio_to_desc(agpio->pin_table[pin_index]); + else + desc = acpi_get_gpiod(agpio->resource_source.string_ptr, agpio->pin_table[pin_index]); + lookup->desc = desc; lookup->info.pin_config = agpio->pin_config; lookup->info.debounce = agpio->debounce_timeout; lookup->info.gpioint = gpioint; @@ -940,8 +945,9 @@ struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode, } /** - * acpi_dev_gpio_irq_get() - Find GpioInt and translate it to Linux IRQ number + * acpi_dev_gpio_irq_get_by() - Find GpioInt and translate it to Linux IRQ number * @adev: pointer to a ACPI device to get IRQ from + * @name: optional name of GpioInt resource * @index: index of GpioInt resource (starting from %0) * * If the device has one or more GpioInt resources, this function can be @@ -951,9 +957,12 @@ struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode, * The function is idempotent, though each time it runs it will configure GPIO * pin direction according to the flags in GpioInt resource. * + * The function takes optional @name parameter. If the resource has a property + * name, then only those will be taken into account. + * * Return: Linux IRQ number (> %0) on success, negative errno on failure. */ -int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) +int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index) { int idx, i; unsigned int irq_flags; @@ -963,7 +972,7 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) struct acpi_gpio_info info; struct gpio_desc *desc; - desc = acpi_get_gpiod_by_index(adev, NULL, i, &info); + desc = acpi_get_gpiod_by_index(adev, name, i, &info); /* Ignore -EPROBE_DEFER, it only matters if idx matches */ if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER) @@ -1008,7 +1017,7 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) } return -ENOENT; } -EXPORT_SYMBOL_GPL(acpi_dev_gpio_irq_get); +EXPORT_SYMBOL_GPL(acpi_dev_gpio_irq_get_by); static acpi_status acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address, diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 97eec8d8dbdc..a4a47305574c 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -473,8 +473,12 @@ EXPORT_SYMBOL_GPL(gpiochip_line_is_valid); static void gpiodevice_release(struct device *dev) { struct gpio_device *gdev = dev_get_drvdata(dev); + unsigned long flags; + spin_lock_irqsave(&gpio_lock, flags); list_del(&gdev->list); + spin_unlock_irqrestore(&gpio_lock, flags); + ida_free(&gpio_ida, gdev->id); kfree_const(gdev->label); kfree(gdev->descs); @@ -569,6 +573,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, struct lock_class_key *lock_key, struct lock_class_key *request_key) { + struct fwnode_handle *fwnode = gc->parent ? dev_fwnode(gc->parent) : NULL; unsigned long flags; int ret = 0; unsigned i; @@ -598,6 +603,12 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, gc->of_node = gdev->dev.of_node; #endif + /* + * Assign fwnode depending on the result of the previous calls, + * if none of them succeed, assign it to the parent's one. + */ + gdev->dev.fwnode = dev_fwnode(&gdev->dev) ?: fwnode; + gdev->id = ida_alloc(&gpio_ida, GFP_KERNEL); if (gdev->id < 0) { ret = gdev->id; diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 0973f408d75f..f0c0ccdc8a10 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -15,6 +15,9 @@ menuconfig DRM select I2C_ALGOBIT select DMA_SHARED_BUFFER select SYNC_FILE +# gallium uses SYS_kcmp for os_same_file_description() to de-duplicate +# device and dmabuf fd. Let's make sure that is available for our userspace. + select KCMP help Kernel-level support for the Direct Rendering Infrastructure (DRI) introduced in XFree86 4.0. If you say Y here, you need to select @@ -229,6 +232,7 @@ source "drivers/gpu/drm/arm/Kconfig" config DRM_RADEON tristate "ATI Radeon" depends on DRM && PCI && MMU + depends on AGP || !AGP select FW_LOADER select DRM_KMS_HELPER select DRM_TTM diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 5993dd0fdd8e..ccdf508aca47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -179,6 +179,7 @@ extern uint amdgpu_smu_memory_pool_size; extern uint amdgpu_dc_feature_mask; extern uint amdgpu_dc_debug_mask; extern uint amdgpu_dm_abm_level; +extern int amdgpu_backlight; extern struct amdgpu_mgpu_info mgpu_info; extern int amdgpu_ras_enable; extern uint amdgpu_ras_mask; @@ -1003,6 +1004,12 @@ struct amdgpu_device { bool in_suspend; bool in_hibernate; + /* + * The combination flag in_poweroff_reboot_com used to identify the poweroff + * and reboot opt in the s0i3 system-wide suspend. + */ + bool in_poweroff_reboot_com; + atomic_t in_gpu_reset; enum pp_mp1_state mp1_state; struct rw_semaphore reset_sem; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 8155c54392c8..2e9b16fb3fcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -903,10 +903,11 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev) */ bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) { +#if defined(CONFIG_AMD_PMC) || defined(CONFIG_AMD_PMC_MODULE) if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) { if (adev->flags & AMD_IS_APU) return true; } - +#endif return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index a6667a2ca0db..c2190c3e97f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -356,7 +356,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, while (size) { uint32_t value; - value = RREG32_PCIE(*pos >> 2); + value = RREG32_PCIE(*pos); r = put_user(value, (uint32_t *)buf); if (r) { pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); @@ -423,7 +423,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user return r; } - WREG32_PCIE(*pos >> 2, value); + WREG32_PCIE(*pos, value); result += 4; buf += 4; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index cab1ebaf6d62..eacfca776249 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2666,7 +2666,8 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) { int i, r; - if (!amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) { + if (adev->in_poweroff_reboot_com || adev->in_hibernate || + !amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) { amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); } @@ -3726,7 +3727,12 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_fence_driver_suspend(adev); - if (!amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) + /* + * TODO: Need figure out the each GNB IP idle off dependency and then + * improve the AMDGPU suspend/resume sequence for system-wide Sx entry/exit. + */ + if (adev->in_poweroff_reboot_com || adev->in_hibernate || + !amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) r = amdgpu_device_ip_suspend_phase2(adev); else amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 7169fb5e3d9c..82cb2ade83b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -777,6 +777,10 @@ uint amdgpu_dm_abm_level; MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) "); module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); +int amdgpu_backlight = -1; +MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (default))"); +module_param_named(backlight, amdgpu_backlight, bint, 0444); + /** * DOC: tmz (int) * Trusted Memory Zone (TMZ) is a method to protect data being written @@ -1098,6 +1102,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x73A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73AB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, + {0x1002, 0x73AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, {0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, /* Van Gogh */ @@ -1266,7 +1271,9 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) */ if (!amdgpu_passthrough(adev)) adev->mp1_state = PP_MP1_STATE_UNLOAD; + adev->in_poweroff_reboot_com = true; amdgpu_device_ip_suspend(adev); + adev->in_poweroff_reboot_com = false; adev->mp1_state = PP_MP1_STATE_NONE; } @@ -1308,8 +1315,13 @@ static int amdgpu_pmops_thaw(struct device *dev) static int amdgpu_pmops_poweroff(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + int r; - return amdgpu_device_suspend(drm_dev, true); + adev->in_poweroff_reboot_com = true; + r = amdgpu_device_suspend(drm_dev, true); + adev->in_poweroff_reboot_com = false; + return r; } static int amdgpu_pmops_restore(struct device *dev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 0bf7d36c6686..5b716404eee1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -146,7 +146,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, size = mode_cmd->pitches[0] * height; aligned_size = ALIGN(size, PAGE_SIZE); ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, flags, - ttm_bo_type_kernel, NULL, &gobj); + ttm_bo_type_device, NULL, &gobj); if (ret) { pr_err("failed to allocate framebuffer (%d)\n", aligned_size); return -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 82e952696d24..1fb2a91ad30a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -846,7 +846,7 @@ static int amdgpu_ras_error_inject_xgmi(struct amdgpu_device *adev, if (amdgpu_dpm_allow_xgmi_power_down(adev, true)) dev_warn(adev->dev, "Failed to allow XGMI power down"); - if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) + if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) dev_warn(adev->dev, "Failed to allow df cstate"); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 6752d8b13118..ce8dc995c10c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -21,7 +21,7 @@ * */ -#if !defined(_AMDGPU_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#if !defined(_AMDGPU_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) #define _AMDGPU_TRACE_H_ #include diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 2d51b7694d1f..572153d08ad1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -560,10 +560,14 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, virt.vf2pf_work.work); + int ret; - amdgpu_virt_read_pf2vf_data(adev); + ret = amdgpu_virt_read_pf2vf_data(adev); + if (ret) + goto out; amdgpu_virt_write_vf2pf_data(adev); +out: schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms); } diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index da37f8a900af..307c01301c87 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -194,19 +194,30 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, wptr = le32_to_cpu(*ih->wptr_cpu); - if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); - /* When a ring buffer overflow happen start parsing interrupt - * from the last not overwritten vector (wptr + 16). Hopefully - * this should allow us to catchup. - */ - dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); - ih->rptr = (wptr + 16) & ih->ptr_mask; - tmp = RREG32(mmIH_RB_CNTL); - tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); - WREG32(mmIH_RB_CNTL, tmp); - } + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + /* Double check that the overflow wasn't already cleared. */ + wptr = RREG32(mmIH_RB_WPTR); + + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 16). Hopefully + * this should allow us to catchup. + */ + dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); + ih->rptr = (wptr + 16) & ih->ptr_mask; + tmp = RREG32(mmIH_RB_CNTL); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32(mmIH_RB_CNTL, tmp); + + +out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index d86b42a36560..e7d6da05011f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -71,6 +71,11 @@ #define GB_ADDR_CONFIG__NUM_PKRS__SHIFT 0x8 #define GB_ADDR_CONFIG__NUM_PKRS_MASK 0x00000700L +#define mmCGTS_TCC_DISABLE_gc_10_3 0x5006 +#define mmCGTS_TCC_DISABLE_gc_10_3_BASE_IDX 1 +#define mmCGTS_USER_TCC_DISABLE_gc_10_3 0x5007 +#define mmCGTS_USER_TCC_DISABLE_gc_10_3_BASE_IDX 1 + #define mmCP_MEC_CNTL_Sienna_Cichlid 0x0f55 #define mmCP_MEC_CNTL_Sienna_Cichlid_BASE_IDX 0 #define mmRLC_SAFE_MODE_Sienna_Cichlid 0x4ca0 @@ -99,10 +104,6 @@ #define mmGCR_GENERAL_CNTL_Sienna_Cichlid 0x1580 #define mmGCR_GENERAL_CNTL_Sienna_Cichlid_BASE_IDX 0 -#define mmCGTS_TCC_DISABLE_Vangogh 0x5006 -#define mmCGTS_TCC_DISABLE_Vangogh_BASE_IDX 1 -#define mmCGTS_USER_TCC_DISABLE_Vangogh 0x5007 -#define mmCGTS_USER_TCC_DISABLE_Vangogh_BASE_IDX 1 #define mmGOLDEN_TSC_COUNT_UPPER_Vangogh 0x0025 #define mmGOLDEN_TSC_COUNT_UPPER_Vangogh_BASE_IDX 1 #define mmGOLDEN_TSC_COUNT_LOWER_Vangogh 0x0026 @@ -4942,15 +4943,12 @@ static void gfx_v10_0_get_tcc_info(struct amdgpu_device *adev) /* TCCs are global (not instanced). */ uint32_t tcc_disable; - switch (adev->asic_type) { - case CHIP_VANGOGH: - tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE_Vangogh) | - RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE_Vangogh); - break; - default: + if (adev->asic_type >= CHIP_SIENNA_CICHLID) { + tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE_gc_10_3) | + RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE_gc_10_3); + } else { tcc_disable = RREG32_SOC15(GC, 0, mmCGTS_TCC_DISABLE) | - RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE); - break; + RREG32_SOC15(GC, 0, mmCGTS_USER_TCC_DISABLE); } adev->gfx.config.tcc_disabled_mask = diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 37d8b6ca4dab..cc957471f31e 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -194,19 +194,29 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, wptr = le32_to_cpu(*ih->wptr_cpu); - if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); - /* When a ring buffer overflow happen start parsing interrupt - * from the last not overwritten vector (wptr + 16). Hopefully - * this should allow us to catchup. - */ - dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); - ih->rptr = (wptr + 16) & ih->ptr_mask; - tmp = RREG32(mmIH_RB_CNTL); - tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); - WREG32(mmIH_RB_CNTL, tmp); - } + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + /* Double check that the overflow wasn't already cleared. */ + wptr = RREG32(mmIH_RB_WPTR); + + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 16). Hopefully + * this should allow us to catchup. + */ + dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); + ih->rptr = (wptr + 16) & ih->ptr_mask; + tmp = RREG32(mmIH_RB_CNTL); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32(mmIH_RB_CNTL, tmp); + + +out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 6bee3677394a..22b96b7d3647 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -498,7 +498,8 @@ static bool nv_is_headless_sku(struct pci_dev *pdev) { if ((pdev->device == 0x731E && (pdev->revision == 0xC6 || pdev->revision == 0xC7)) || - (pdev->device == 0x7340 && pdev->revision == 0xC9)) + (pdev->device == 0x7340 && pdev->revision == 0xC9) || + (pdev->device == 0x7360 && pdev->revision == 0xC7)) return true; return false; } @@ -568,7 +569,8 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT && !amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); + if (!nv_is_headless_sku(adev->pdev)) + amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block); if (!amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block); break; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 0b3516c4eefb..b2a93d801082 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -241,6 +241,8 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev) { u32 reference_clock = adev->clock.spll.reference_freq; + if (adev->asic_type == CHIP_RENOIR) + return 10000; if (adev->asic_type == CHIP_RAVEN) return reference_clock / 4; diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index ce3319993b4b..249fcbee7871 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -196,19 +196,30 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, wptr = le32_to_cpu(*ih->wptr_cpu); - if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); - /* When a ring buffer overflow happen start parsing interrupt - * from the last not overwritten vector (wptr + 16). Hopefully - * this should allow us to catchup. - */ - dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); - ih->rptr = (wptr + 16) & ih->ptr_mask; - tmp = RREG32(mmIH_RB_CNTL); - tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); - WREG32(mmIH_RB_CNTL, tmp); - } + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + /* Double check that the overflow wasn't already cleared. */ + wptr = RREG32(mmIH_RB_WPTR); + + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 16). Hopefully + * this should allow us to catchup. + */ + + dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); + ih->rptr = (wptr + 16) & ih->ptr_mask; + tmp = RREG32(mmIH_RB_CNTL); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32(mmIH_RB_CNTL, tmp); + +out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 16262e5d93f5..7351dd195274 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -243,11 +243,11 @@ get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) static inline void dqm_lock(struct device_queue_manager *dqm) { mutex_lock(&dqm->lock_hidden); - dqm->saved_flags = memalloc_nofs_save(); + dqm->saved_flags = memalloc_noreclaim_save(); } static inline void dqm_unlock(struct device_queue_manager *dqm) { - memalloc_nofs_restore(dqm->saved_flags); + memalloc_noreclaim_restore(dqm->saved_flags); mutex_unlock(&dqm->lock_hidden); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 961abf1cf040..ad4afbc37d51 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -1131,7 +1131,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev) #ifdef CONFIG_DRM_AMD_DC_HDCP if (adev->dm.hdcp_workqueue) { - hdcp_destroy(adev->dm.hdcp_workqueue); + hdcp_destroy(&adev->dev->kobj, adev->dm.hdcp_workqueue); adev->dm.hdcp_workqueue = NULL; } @@ -1934,7 +1934,7 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state, dc_commit_updates_for_stream( dm->dc, bundle->surface_updates, dc_state->stream_status->plane_count, - dc_state->streams[k], &bundle->stream_update); + dc_state->streams[k], &bundle->stream_update, dc_state); } cleanup: @@ -1965,7 +1965,8 @@ static void dm_set_dpms_off(struct dc_link *link) stream_update.stream = stream_state; dc_commit_updates_for_stream(stream_state->ctx->dc, NULL, 0, - stream_state, &stream_update); + stream_state, &stream_update, + stream_state->ctx->dc->current_state); mutex_unlock(&adev->dm.dc_lock); } @@ -2208,6 +2209,11 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector) caps->ext_caps->bits.hdr_aux_backlight_control == 1) caps->aux_support = true; + if (amdgpu_backlight == 0) + caps->aux_support = false; + else if (amdgpu_backlight == 1) + caps->aux_support = true; + /* From the specification (CTA-861-G), for calculating the maximum * luminance we need to use: * Luminance = 50*2**(CV/32) @@ -3126,19 +3132,6 @@ static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm) #endif } -static int set_backlight_via_aux(struct dc_link *link, uint32_t brightness) -{ - bool rc; - - if (!link) - return 1; - - rc = dc_link_set_backlight_level_nits(link, true, brightness, - AUX_BL_DEFAULT_TRANSITION_TIME_MS); - - return rc ? 0 : 1; -} - static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps, unsigned *min, unsigned *max) { @@ -3201,9 +3194,10 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) brightness = convert_brightness_from_user(&caps, bd->props.brightness); // Change brightness based on AUX property if (caps.aux_support) - return set_backlight_via_aux(link, brightness); - - rc = dc_link_set_backlight_level(dm->backlight_link, brightness, 0); + rc = dc_link_set_backlight_level_nits(link, true, brightness, + AUX_BL_DEFAULT_TRANSITION_TIME_MS); + else + rc = dc_link_set_backlight_level(dm->backlight_link, brightness, 0); return rc ? 0 : 1; } @@ -3211,11 +3205,27 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd) static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd) { struct amdgpu_display_manager *dm = bl_get_data(bd); - int ret = dc_link_get_backlight_level(dm->backlight_link); + struct amdgpu_dm_backlight_caps caps; + + amdgpu_dm_update_backlight_caps(dm); + caps = dm->backlight_caps; + + if (caps.aux_support) { + struct dc_link *link = (struct dc_link *)dm->backlight_link; + u32 avg, peak; + bool rc; + + rc = dc_link_get_backlight_level_nits(link, &avg, &peak); + if (!rc) + return bd->props.brightness; + return convert_brightness_to_user(&caps, avg); + } else { + int ret = dc_link_get_backlight_level(dm->backlight_link); - if (ret == DC_ERROR_UNEXPECTED) - return bd->props.brightness; - return convert_brightness_to_user(&dm->backlight_caps, ret); + if (ret == DC_ERROR_UNEXPECTED) + return bd->props.brightness; + return convert_brightness_to_user(&caps, ret); + } } static const struct backlight_ops amdgpu_dm_backlight_ops = { @@ -4606,6 +4616,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev, dc_plane_state->global_alpha_value = plane_info.global_alpha_value; dc_plane_state->dcc = plane_info.dcc; dc_plane_state->layer_index = plane_info.layer_index; // Always returns 0 + dc_plane_state->flip_int_enabled = true; /* * Always set input transfer function, since plane state is refreshed @@ -7548,7 +7559,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, struct drm_crtc *pcrtc, bool wait_for_vblank) { - int i; + uint32_t i; uint64_t timestamp_ns; struct drm_plane *plane; struct drm_plane_state *old_plane_state, *new_plane_state; @@ -7589,7 +7600,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, amdgpu_dm_commit_cursors(state); /* update planes when needed */ - for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) { + for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) { struct drm_crtc *crtc = new_plane_state->crtc; struct drm_crtc_state *new_crtc_state; struct drm_framebuffer *fb = new_plane_state->fb; @@ -7812,7 +7823,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state, bundle->surface_updates, planes_count, acrtc_state->stream, - &bundle->stream_update); + &bundle->stream_update, + dc_state); /** * Enable or disable the interrupts on the backend. @@ -8148,13 +8160,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state); struct dm_connector_state *dm_old_con_state = to_dm_connector_state(old_con_state); struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc); - struct dc_surface_update surface_updates[MAX_SURFACES]; + struct dc_surface_update dummy_updates[MAX_SURFACES]; struct dc_stream_update stream_update; struct dc_info_packet hdr_packet; struct dc_stream_status *status = NULL; bool abm_changed, hdr_changed, scaling_changed; - memset(&surface_updates, 0, sizeof(surface_updates)); + memset(&dummy_updates, 0, sizeof(dummy_updates)); memset(&stream_update, 0, sizeof(stream_update)); if (acrtc) { @@ -8211,15 +8223,16 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) * To fix this, DC should permit updating only stream properties. */ for (j = 0; j < status->plane_count; j++) - surface_updates[j].surface = status->plane_states[j]; + dummy_updates[j].surface = status->plane_states[0]; mutex_lock(&dm->dc_lock); dc_commit_updates_for_stream(dm->dc, - surface_updates, + dummy_updates, status->plane_count, dm_new_crtc_state->stream, - &stream_update); + &stream_update, + dc_state); mutex_unlock(&dm->dc_lock); } diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c index c2cd184f0bbd..79de68ac03f2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c @@ -376,7 +376,7 @@ static void event_cpirq(struct work_struct *work) } -void hdcp_destroy(struct hdcp_workqueue *hdcp_work) +void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *hdcp_work) { int i = 0; @@ -385,6 +385,7 @@ void hdcp_destroy(struct hdcp_workqueue *hdcp_work) cancel_delayed_work_sync(&hdcp_work[i].watchdog_timer_dwork); } + sysfs_remove_bin_file(kobj, &hdcp_work[0].attr); kfree(hdcp_work->srm); kfree(hdcp_work->srm_temp); kfree(hdcp_work); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h index 5159b3a5e5b0..09294ff122fe 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.h @@ -69,7 +69,7 @@ void hdcp_update_display(struct hdcp_workqueue *hdcp_work, void hdcp_reset_display(struct hdcp_workqueue *work, unsigned int link_index); void hdcp_handle_cpirq(struct hdcp_workqueue *work, unsigned int link_index); -void hdcp_destroy(struct hdcp_workqueue *work); +void hdcp_destroy(struct kobject *kobj, struct hdcp_workqueue *work); struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct cp_psp *cp_psp, struct dc *dc); diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table.c b/drivers/gpu/drm/amd/display/dc/bios/command_table.c index 070459e3e407..afc10b954ffa 100644 --- a/drivers/gpu/drm/amd/display/dc/bios/command_table.c +++ b/drivers/gpu/drm/amd/display/dc/bios/command_table.c @@ -245,6 +245,23 @@ static enum bp_result encoder_control_digx_v3( cntl->enable_dp_audio); params.ucLaneNum = (uint8_t)(cntl->lanes_number); + switch (cntl->color_depth) { + case COLOR_DEPTH_888: + params.ucBitPerColor = PANEL_8BIT_PER_COLOR; + break; + case COLOR_DEPTH_101010: + params.ucBitPerColor = PANEL_10BIT_PER_COLOR; + break; + case COLOR_DEPTH_121212: + params.ucBitPerColor = PANEL_12BIT_PER_COLOR; + break; + case COLOR_DEPTH_161616: + params.ucBitPerColor = PANEL_16BIT_PER_COLOR; + break; + default: + break; + } + if (EXEC_BIOS_CMD_TABLE(DIGxEncoderControl, params)) result = BP_RESULT_OK; @@ -274,6 +291,23 @@ static enum bp_result encoder_control_digx_v4( cntl->enable_dp_audio)); params.ucLaneNum = (uint8_t)(cntl->lanes_number); + switch (cntl->color_depth) { + case COLOR_DEPTH_888: + params.ucBitPerColor = PANEL_8BIT_PER_COLOR; + break; + case COLOR_DEPTH_101010: + params.ucBitPerColor = PANEL_10BIT_PER_COLOR; + break; + case COLOR_DEPTH_121212: + params.ucBitPerColor = PANEL_12BIT_PER_COLOR; + break; + case COLOR_DEPTH_161616: + params.ucBitPerColor = PANEL_16BIT_PER_COLOR; + break; + default: + break; + } + if (EXEC_BIOS_CMD_TABLE(DIGxEncoderControl, params)) result = BP_RESULT_OK; @@ -1057,6 +1091,19 @@ static enum bp_result set_pixel_clock_v5( * driver choose program it itself, i.e. here we program it * to 888 by default. */ + if (bp_params->signal_type == SIGNAL_TYPE_HDMI_TYPE_A) + switch (bp_params->color_depth) { + case TRANSMITTER_COLOR_DEPTH_30: + /* yes this is correct, the atom define is wrong */ + clk.sPCLKInput.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_32BPP; + break; + case TRANSMITTER_COLOR_DEPTH_36: + /* yes this is correct, the atom define is wrong */ + clk.sPCLKInput.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_30BPP; + break; + default: + break; + } if (EXEC_BIOS_CMD_TABLE(SetPixelClock, clk)) result = BP_RESULT_OK; @@ -1135,6 +1182,20 @@ static enum bp_result set_pixel_clock_v6( * driver choose program it itself, i.e. here we pass required * target rate that includes deep color. */ + if (bp_params->signal_type == SIGNAL_TYPE_HDMI_TYPE_A) + switch (bp_params->color_depth) { + case TRANSMITTER_COLOR_DEPTH_30: + clk.sPCLKInput.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_30BPP_V6; + break; + case TRANSMITTER_COLOR_DEPTH_36: + clk.sPCLKInput.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_36BPP_V6; + break; + case TRANSMITTER_COLOR_DEPTH_48: + clk.sPCLKInput.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_48BPP; + break; + default: + break; + } if (EXEC_BIOS_CMD_TABLE(SetPixelClock, clk)) result = BP_RESULT_OK; diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 6cf1a5a2a5ec..58eb0d69873a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -2679,7 +2679,8 @@ void dc_commit_updates_for_stream(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, struct dc_stream_state *stream, - struct dc_stream_update *stream_update) + struct dc_stream_update *stream_update, + struct dc_state *state) { const struct dc_stream_status *stream_status; enum surface_update_type update_type; @@ -2698,12 +2699,6 @@ void dc_commit_updates_for_stream(struct dc *dc, if (update_type >= UPDATE_TYPE_FULL) { - struct dc_plane_state *new_planes[MAX_SURFACES]; - - memset(new_planes, 0, sizeof(new_planes)); - - for (i = 0; i < surface_count; i++) - new_planes[i] = srf_updates[i].surface; /* initialize scratch memory for building context */ context = dc_create_state(dc); @@ -2712,21 +2707,15 @@ void dc_commit_updates_for_stream(struct dc *dc, return; } - dc_resource_state_copy_construct( - dc->current_state, context); + dc_resource_state_copy_construct(state, context); - /*remove old surfaces from context */ - if (!dc_rem_all_planes_for_stream(dc, stream, context)) { - DC_ERROR("Failed to remove streams for new validate context!\n"); - return; - } + for (i = 0; i < dc->res_pool->pipe_count; i++) { + struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i]; + struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i]; - /* add surface to context */ - if (!dc_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) { - DC_ERROR("Failed to add streams for new validate context!\n"); - return; + if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state) + new_pipe->plane_state->force_full_update = true; } - } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index f4a2088ab179..32cb5ce8bcd0 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -1470,6 +1470,11 @@ static bool dc_link_construct(struct dc_link *link, goto ddc_create_fail; } + if (!link->ddc->ddc_pin) { + DC_ERROR("Failed to get I2C info for connector!\n"); + goto ddc_create_fail; + } + link->ddc_hw_inst = dal_ddc_get_line(dal_ddc_service_get_ddc_pin(link->ddc)); @@ -2566,7 +2571,6 @@ bool dc_link_set_backlight_level(const struct dc_link *link, if (pipe_ctx->plane_state == NULL) frame_ramp = 0; } else { - ASSERT(false); return false; } diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 3aedadb34548..414b44b4ced4 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -889,6 +889,7 @@ struct dc_plane_state { int layer_index; union surface_update_flags update_flags; + bool flip_int_enabled; /* private to DC core */ struct dc_plane_status status; struct dc_context *ctx; diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h index e243c01b9672..b7910976b81a 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_stream.h +++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h @@ -283,7 +283,8 @@ void dc_commit_updates_for_stream(struct dc *dc, struct dc_surface_update *srf_updates, int surface_count, struct dc_stream_state *stream, - struct dc_stream_update *stream_update); + struct dc_stream_update *stream_update, + struct dc_state *state); /* * Log the current stream state. */ diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c index fb733f573715..466f8f5803c9 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clock_source.c @@ -871,6 +871,20 @@ static bool dce110_program_pix_clk( bp_pc_params.flags.SET_EXTERNAL_REF_DIV_SRC = pll_settings->use_external_clk; + switch (pix_clk_params->color_depth) { + case COLOR_DEPTH_101010: + bp_pc_params.color_depth = TRANSMITTER_COLOR_DEPTH_30; + break; + case COLOR_DEPTH_121212: + bp_pc_params.color_depth = TRANSMITTER_COLOR_DEPTH_36; + break; + case COLOR_DEPTH_161616: + bp_pc_params.color_depth = TRANSMITTER_COLOR_DEPTH_48; + break; + default: + break; + } + if (clk_src->bios->funcs->set_pixel_clock( clk_src->bios, &bp_pc_params) != BP_RESULT_OK) return false; diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c index ada57f745fd7..19e380e0a330 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_stream_encoder.c @@ -564,6 +564,7 @@ static void dce110_stream_encoder_hdmi_set_stream_attribute( cntl.enable_dp_audio = enable_audio; cntl.pixel_clock = actual_pix_clk_khz; cntl.lanes_number = LANE_COUNT_FOUR; + cntl.color_depth = crtc_timing->display_color_depth; if (enc110->base.bp->funcs->encoder_control( enc110->base.bp, &cntl) != BP_RESULT_OK) diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c index 130a0a0c8332..68028ec995e7 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dce_transform.c @@ -601,12 +601,12 @@ static void set_clamp( clamp_max = 0x3FC0; break; case COLOR_DEPTH_101010: - /* 10bit MSB aligned on 14 bit bus '11 1111 1111 1100' */ - clamp_max = 0x3FFC; + /* 10bit MSB aligned on 14 bit bus '11 1111 1111 0000' */ + clamp_max = 0x3FF0; break; case COLOR_DEPTH_121212: - /* 12bit MSB aligned on 14 bit bus '11 1111 1111 1111' */ - clamp_max = 0x3FFF; + /* 12bit MSB aligned on 14 bit bus '11 1111 1111 1100' */ + clamp_max = 0x3FFC; break; default: clamp_max = 0x3FC0; diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c index 9e796dfeac20..714c71a5fbde 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c @@ -1257,6 +1257,16 @@ void hubp1_soft_reset(struct hubp *hubp, bool reset) REG_UPDATE(DCHUBP_CNTL, HUBP_DISABLE, reset ? 1 : 0); } +void hubp1_set_flip_int(struct hubp *hubp) +{ + struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp); + + REG_UPDATE(DCSURF_SURFACE_FLIP_INTERRUPT, + SURFACE_FLIP_INT_MASK, 1); + + return; +} + void hubp1_init(struct hubp *hubp) { //do nothing @@ -1290,6 +1300,7 @@ static const struct hubp_funcs dcn10_hubp_funcs = { .dmdata_load = NULL, .hubp_soft_reset = hubp1_soft_reset, .hubp_in_blank = hubp1_in_blank, + .hubp_set_flip_int = hubp1_set_flip_int, }; /*****************************************/ diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h index a9a6ed7f4f99..e2f2f6995935 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h @@ -74,6 +74,7 @@ SRI(DCSURF_SURFACE_EARLIEST_INUSE_C, HUBPREQ, id),\ SRI(DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C, HUBPREQ, id),\ SRI(DCSURF_SURFACE_CONTROL, HUBPREQ, id),\ + SRI(DCSURF_SURFACE_FLIP_INTERRUPT, HUBPREQ, id),\ SRI(HUBPRET_CONTROL, HUBPRET, id),\ SRI(DCN_EXPANSION_MODE, HUBPREQ, id),\ SRI(DCHUBP_REQ_SIZE_CONFIG, HUBP, id),\ @@ -183,6 +184,7 @@ uint32_t DCSURF_SURFACE_EARLIEST_INUSE_C; \ uint32_t DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C; \ uint32_t DCSURF_SURFACE_CONTROL; \ + uint32_t DCSURF_SURFACE_FLIP_INTERRUPT; \ uint32_t HUBPRET_CONTROL; \ uint32_t DCN_EXPANSION_MODE; \ uint32_t DCHUBP_REQ_SIZE_CONFIG; \ @@ -332,6 +334,7 @@ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_META_SURFACE_TMZ_C, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_DCC_EN, mask_sh),\ HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_DCC_IND_64B_BLK, mask_sh),\ + HUBP_SF(HUBPREQ0_DCSURF_SURFACE_FLIP_INTERRUPT, SURFACE_FLIP_INT_MASK, mask_sh),\ HUBP_SF(HUBPRET0_HUBPRET_CONTROL, DET_BUF_PLANE1_BASE_ADDRESS, mask_sh),\ HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_CB_B, mask_sh),\ HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_CR_R, mask_sh),\ @@ -531,6 +534,7 @@ type PRIMARY_SURFACE_DCC_IND_64B_BLK;\ type SECONDARY_SURFACE_DCC_EN;\ type SECONDARY_SURFACE_DCC_IND_64B_BLK;\ + type SURFACE_FLIP_INT_MASK;\ type DET_BUF_PLANE1_BASE_ADDRESS;\ type CROSSBAR_SRC_CB_B;\ type CROSSBAR_SRC_CR_R;\ @@ -777,4 +781,6 @@ void hubp1_read_state_common(struct hubp *hubp); bool hubp1_in_blank(struct hubp *hubp); void hubp1_soft_reset(struct hubp *hubp, bool reset); +void hubp1_set_flip_int(struct hubp *hubp); + #endif diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c index 017b67b830e6..3e86e042de0d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c @@ -2195,6 +2195,13 @@ static void dcn10_enable_plane( if (dc->debug.sanity_checks) { hws->funcs.verify_allow_pstate_change_high(dc); } + + if (!pipe_ctx->top_pipe + && pipe_ctx->plane_state + && pipe_ctx->plane_state->flip_int_enabled + && pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int) + pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int(pipe_ctx->plane_res.hubp); + } void dcn10_program_gamut_remap(struct pipe_ctx *pipe_ctx) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c index 81db0179f7ea..85dc2b16c941 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c @@ -480,7 +480,6 @@ unsigned int dcn10_get_dig_frontend(struct link_encoder *enc) break; default: // invalid source select DIG - ASSERT(false); result = ENGINE_ID_UNKNOWN; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c index 0df0da2e6a4d..bec7059f6d5d 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c @@ -1597,6 +1597,7 @@ static struct hubp_funcs dcn20_hubp_funcs = { .validate_dml_output = hubp2_validate_dml_output, .hubp_in_blank = hubp1_in_blank, .hubp_soft_reset = hubp1_soft_reset, + .hubp_set_flip_int = hubp1_set_flip_int, }; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c index 480d928cb1ca..077ba9cf69c5 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c @@ -1146,6 +1146,12 @@ void dcn20_enable_plane( pipe_ctx->plane_res.hubp->funcs->hubp_set_vm_system_aperture_settings(pipe_ctx->plane_res.hubp, &apt); } + if (!pipe_ctx->top_pipe + && pipe_ctx->plane_state + && pipe_ctx->plane_state->flip_int_enabled + && pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int) + pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int(pipe_ctx->plane_res.hubp); + // if (dc->debug.sanity_checks) { // dcn10_verify_allow_pstate_change_high(dc); // } @@ -1501,38 +1507,8 @@ static void dcn20_update_dchubp_dpp( if (pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed || pipe_ctx->stream->update_flags.bits.gamut_remap || pipe_ctx->stream->update_flags.bits.out_csc) { - struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc; - - if (mpc->funcs->set_gamut_remap) { - int i; - int mpcc_id = hubp->inst; - struct mpc_grph_gamut_adjustment adjust; - bool enable_remap_dpp = false; - - memset(&adjust, 0, sizeof(adjust)); - adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS; - - /* save the enablement of gamut remap for dpp */ - enable_remap_dpp = pipe_ctx->stream->gamut_remap_matrix.enable_remap; - - /* force bypass gamut remap for dpp/cm */ - pipe_ctx->stream->gamut_remap_matrix.enable_remap = false; - dc->hwss.program_gamut_remap(pipe_ctx); - - /* restore gamut remap flag and use this remap into mpc */ - pipe_ctx->stream->gamut_remap_matrix.enable_remap = enable_remap_dpp; - - /* build remap matrix for top plane if enabled */ - if (enable_remap_dpp && pipe_ctx->top_pipe == NULL) { - adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW; - for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++) - adjust.temperature_matrix[i] = - pipe_ctx->stream->gamut_remap_matrix.matrix[i]; - } - mpc->funcs->set_gamut_remap(mpc, mpcc_id, &adjust); - } else - /* dpp/cm gamut remap*/ - dc->hwss.program_gamut_remap(pipe_ctx); + /* dpp/cm gamut remap*/ + dc->hwss.program_gamut_remap(pipe_ctx); /*call the dcn2 method which uses mpc csc*/ dc->hwss.program_output_csc(dc, diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c index 15c2ff264ff6..1a347484cf2a 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c @@ -341,8 +341,7 @@ void enc2_hw_init(struct link_encoder *enc) } else { AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, 0x103d1110); - AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c4d); - + AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c7a); } //AUX_DPHY_TX_REF_CONTROL'AUX_TX_REF_DIV HW default is 0x32; diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c index d6b488561871..354c2a2702d7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c @@ -408,8 +408,8 @@ static struct _vcs_dpi_soc_bounding_box_st dcn2_0_nv14_soc = { }, }, .num_states = 5, - .sr_exit_time_us = 11.6, - .sr_enter_plus_exit_time_us = 13.9, + .sr_exit_time_us = 8.6, + .sr_enter_plus_exit_time_us = 10.9, .urgent_latency_us = 4.0, .urgent_latency_pixel_data_only_us = 4.0, .urgent_latency_pixel_mixed_with_vm_data_us = 4.0, @@ -3245,7 +3245,7 @@ static noinline bool dcn20_validate_bandwidth_fp(struct dc *dc, bool dcn20_validate_bandwidth(struct dc *dc, struct dc_state *context, bool fast_validate) { - bool voltage_supported = false; + bool voltage_supported; DC_FP_START(); voltage_supported = dcn20_validate_bandwidth_fp(dc, context, fast_validate); DC_FP_END(); diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c index f9045852728f..b0c9180b808f 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c @@ -838,6 +838,7 @@ static struct hubp_funcs dcn21_hubp_funcs = { .hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl, .hubp_init = hubp21_init, .validate_dml_output = hubp21_validate_dml_output, + .hubp_set_flip_int = hubp1_set_flip_int, }; bool hubp21_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c index 674376428916..4a3df13c9e49 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c @@ -296,7 +296,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = { .num_banks = 8, .num_chans = 4, .vmm_page_size_bytes = 4096, - .dram_clock_change_latency_us = 11.72, + .dram_clock_change_latency_us = 23.84, .return_bus_width_bytes = 64, .dispclk_dppclk_vco_speed_mhz = 3600, .xfc_bus_transport_time_us = 4, @@ -1062,8 +1062,6 @@ static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_s { int i; - DC_FP_START(); - if (dc->bb_overrides.sr_exit_time_ns) { for (i = 0; i < WM_SET_COUNT; i++) { dc->clk_mgr->bw_params->wm_table.entries[i].sr_exit_time_us = @@ -1088,8 +1086,6 @@ static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_s dc->bb_overrides.dram_clock_change_latency_ns / 1000.0; } } - - DC_FP_END(); } void dcn21_calculate_wm( @@ -1329,8 +1325,8 @@ static bool dcn21_fast_validate_bw( return out; } -bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context, - bool fast_validate) +static noinline bool dcn21_validate_bandwidth_fp(struct dc *dc, + struct dc_state *context, bool fast_validate) { bool out = false; @@ -1339,7 +1335,7 @@ bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context, int vlevel = 0; int pipe_split_from[MAX_PIPES]; int pipe_cnt = 0; - display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL); + display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC); DC_LOGGER_INIT(dc->ctx->logger); BW_VAL_TRACE_COUNT(); @@ -1383,6 +1379,22 @@ bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context, return out; } + +/* + * Some of the functions further below use the FPU, so we need to wrap this + * with DC_FP_START()/DC_FP_END(). Use the same approach as for + * dcn20_validate_bandwidth in dcn20_resource.c. + */ +bool dcn21_validate_bandwidth(struct dc *dc, struct dc_state *context, + bool fast_validate) +{ + bool voltage_supported; + DC_FP_START(); + voltage_supported = dcn21_validate_bandwidth_fp(dc, context, fast_validate); + DC_FP_END(); + return voltage_supported; +} + static void dcn21_destroy_resource_pool(struct resource_pool **pool) { struct dcn21_resource_pool *dcn21_pool = TO_DCN21_RES_POOL(*pool); @@ -1583,6 +1595,11 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param dcn2_1_soc.num_chans = bw_params->num_channels; ASSERT(clk_table->num_entries); + /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over null states later */ + for (i = 0; i < dcn2_1_soc.num_states + 1; i++) { + clock_limits[i] = dcn2_1_soc.clock_limits[i]; + } + for (i = 0; i < clk_table->num_entries; i++) { /* loop backwards*/ for (closest_clk_lvl = 0, j = dcn2_1_soc.num_states - 1; j >= 0; j--) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c index 41a1d0e9b7e2..e0df9b0065f9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c @@ -113,6 +113,7 @@ bool cm3_helper_translate_curve_to_hw_format( struct pwl_result_data *rgb_resulted; struct pwl_result_data *rgb; struct pwl_result_data *rgb_plus_1; + struct pwl_result_data *rgb_minus_1; struct fixed31_32 end_value; int32_t region_start, region_end; @@ -140,7 +141,7 @@ bool cm3_helper_translate_curve_to_hw_format( region_start = -MAX_LOW_POINT; region_end = NUMBER_REGIONS - MAX_LOW_POINT; } else { - /* 10 segments + /* 11 segments * segment is from 2^-10 to 2^0 * There are less than 256 points, for optimization */ @@ -154,9 +155,10 @@ bool cm3_helper_translate_curve_to_hw_format( seg_distr[7] = 4; seg_distr[8] = 4; seg_distr[9] = 4; + seg_distr[10] = 1; region_start = -10; - region_end = 0; + region_end = 1; } for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++) @@ -189,6 +191,10 @@ bool cm3_helper_translate_curve_to_hw_format( rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index]; rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index]; + rgb_resulted[hw_points].red = rgb_resulted[hw_points - 1].red; + rgb_resulted[hw_points].green = rgb_resulted[hw_points - 1].green; + rgb_resulted[hw_points].blue = rgb_resulted[hw_points - 1].blue; + // All 3 color channels have same x corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2), dc_fixpt_from_int(region_start)); @@ -259,15 +265,18 @@ bool cm3_helper_translate_curve_to_hw_format( rgb = rgb_resulted; rgb_plus_1 = rgb_resulted + 1; + rgb_minus_1 = rgb; i = 1; while (i != hw_points + 1) { - if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) - rgb_plus_1->red = rgb->red; - if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) - rgb_plus_1->green = rgb->green; - if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) - rgb_plus_1->blue = rgb->blue; + if (i >= hw_points - 1) { + if (dc_fixpt_lt(rgb_plus_1->red, rgb->red)) + rgb_plus_1->red = dc_fixpt_add(rgb->red, rgb_minus_1->delta_red); + if (dc_fixpt_lt(rgb_plus_1->green, rgb->green)) + rgb_plus_1->green = dc_fixpt_add(rgb->green, rgb_minus_1->delta_green); + if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue)) + rgb_plus_1->blue = dc_fixpt_add(rgb->blue, rgb_minus_1->delta_blue); + } rgb->delta_red = dc_fixpt_sub(rgb_plus_1->red, rgb->red); rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green); @@ -283,6 +292,7 @@ bool cm3_helper_translate_curve_to_hw_format( } ++rgb_plus_1; + rgb_minus_1 = rgb; ++rgb; ++i; } diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c index 88ffa9ff1ed1..f24612523248 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c @@ -511,6 +511,7 @@ static struct hubp_funcs dcn30_hubp_funcs = { .hubp_init = hubp3_init, .hubp_in_blank = hubp1_in_blank, .hubp_soft_reset = hubp1_soft_reset, + .hubp_set_flip_int = hubp1_set_flip_int, }; bool hubp3_construct( diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index 3deb3fb1724d..0631c16f9aff 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -539,6 +539,8 @@ void dcn30_init_hw(struct dc *dc) fe = dc->links[i]->link_enc->funcs->get_dig_frontend( dc->links[i]->link_enc); + if (fe == ENGINE_ID_UNKNOWN) + continue; for (j = 0; j < dc->res_pool->stream_enc_count; j++) { if (fe == dc->res_pool->stream_enc[j]->id) { diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 5e126fdf6ec1..7ec8936346b2 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -2601,6 +2601,19 @@ static const struct resource_funcs dcn30_res_pool_funcs = { .patch_unknown_plane_state = dcn20_patch_unknown_plane_state, }; +#define CTX ctx + +#define REG(reg_name) \ + (DCN_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name) + +static uint32_t read_pipe_fuses(struct dc_context *ctx) +{ + uint32_t value = REG_READ(CC_DC_PIPE_DIS); + /* Support for max 6 pipes */ + value = value & 0x3f; + return value; +} + static bool dcn30_resource_construct( uint8_t num_virtual_links, struct dc *dc, @@ -2610,6 +2623,15 @@ static bool dcn30_resource_construct( struct dc_context *ctx = dc->ctx; struct irq_service_init_data init_data; struct ddc_service_init_data ddc_init_data; + uint32_t pipe_fuses = read_pipe_fuses(ctx); + uint32_t num_pipes = 0; + + if (!(pipe_fuses == 0 || pipe_fuses == 0x3e)) { + BREAK_TO_DEBUGGER(); + dm_error("DC: Unexpected fuse recipe for navi2x !\n"); + /* fault to single pipe */ + pipe_fuses = 0x3e; + } DC_FP_START(); @@ -2739,6 +2761,15 @@ static bool dcn30_resource_construct( /* PP Lib and SMU interfaces */ init_soc_bounding_box(dc, pool); + num_pipes = dcn3_0_ip.max_num_dpp; + + for (i = 0; i < dcn3_0_ip.max_num_dpp; i++) + if (pipe_fuses & 1 << i) + num_pipes--; + + dcn3_0_ip.max_num_dpp = num_pipes; + dcn3_0_ip.max_num_otg = num_pipes; + dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30); /* IRQ */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c index 35f5bf08ae96..23bc208cbfa4 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c @@ -1722,12 +1722,106 @@ static void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *b dml_init_instance(&dc->dml, &dcn3_01_soc, &dcn3_01_ip, DML_PROJECT_DCN30); } +static void calculate_wm_set_for_vlevel( + int vlevel, + struct wm_range_table_entry *table_entry, + struct dcn_watermarks *wm_set, + struct display_mode_lib *dml, + display_e2e_pipe_params_st *pipes, + int pipe_cnt) +{ + double dram_clock_change_latency_cached = dml->soc.dram_clock_change_latency_us; + + ASSERT(vlevel < dml->soc.num_states); + /* only pipe 0 is read for voltage and dcf/soc clocks */ + pipes[0].clks_cfg.voltage = vlevel; + pipes[0].clks_cfg.dcfclk_mhz = dml->soc.clock_limits[vlevel].dcfclk_mhz; + pipes[0].clks_cfg.socclk_mhz = dml->soc.clock_limits[vlevel].socclk_mhz; + + dml->soc.dram_clock_change_latency_us = table_entry->pstate_latency_us; + dml->soc.sr_exit_time_us = table_entry->sr_exit_time_us; + dml->soc.sr_enter_plus_exit_time_us = table_entry->sr_enter_plus_exit_time_us; + + wm_set->urgent_ns = get_wm_urgent(dml, pipes, pipe_cnt) * 1000; + wm_set->cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(dml, pipes, pipe_cnt) * 1000; + wm_set->cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(dml, pipes, pipe_cnt) * 1000; + wm_set->cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(dml, pipes, pipe_cnt) * 1000; + wm_set->pte_meta_urgent_ns = get_wm_memory_trip(dml, pipes, pipe_cnt) * 1000; + wm_set->frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(dml, pipes, pipe_cnt) * 1000; + wm_set->frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(dml, pipes, pipe_cnt) * 1000; + wm_set->urgent_latency_ns = get_urgent_latency(dml, pipes, pipe_cnt) * 1000; + dml->soc.dram_clock_change_latency_us = dram_clock_change_latency_cached; + +} + +static void dcn301_calculate_wm_and_dlg( + struct dc *dc, struct dc_state *context, + display_e2e_pipe_params_st *pipes, + int pipe_cnt, + int vlevel_req) +{ + int i, pipe_idx; + int vlevel, vlevel_max; + struct wm_range_table_entry *table_entry; + struct clk_bw_params *bw_params = dc->clk_mgr->bw_params; + + ASSERT(bw_params); + + vlevel_max = bw_params->clk_table.num_entries - 1; + + /* WM Set D */ + table_entry = &bw_params->wm_table.entries[WM_D]; + if (table_entry->wm_type == WM_TYPE_RETRAINING) + vlevel = 0; + else + vlevel = vlevel_max; + calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.d, + &context->bw_ctx.dml, pipes, pipe_cnt); + /* WM Set C */ + table_entry = &bw_params->wm_table.entries[WM_C]; + vlevel = min(max(vlevel_req, 2), vlevel_max); + calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.c, + &context->bw_ctx.dml, pipes, pipe_cnt); + /* WM Set B */ + table_entry = &bw_params->wm_table.entries[WM_B]; + vlevel = min(max(vlevel_req, 1), vlevel_max); + calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.b, + &context->bw_ctx.dml, pipes, pipe_cnt); + + /* WM Set A */ + table_entry = &bw_params->wm_table.entries[WM_A]; + vlevel = min(vlevel_req, vlevel_max); + calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.a, + &context->bw_ctx.dml, pipes, pipe_cnt); + + for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) { + if (!context->res_ctx.pipe_ctx[i].stream) + continue; + + pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt); + pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx); + + if (dc->config.forced_clocks) { + pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz; + pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz; + } + if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0; + if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000) + pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0; + + pipe_idx++; + } + + dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel); +} + static struct resource_funcs dcn301_res_pool_funcs = { .destroy = dcn301_destroy_resource_pool, .link_enc_create = dcn301_link_encoder_create, .panel_cntl_create = dcn301_panel_cntl_create, .validate_bandwidth = dcn30_validate_bandwidth, - .calculate_wm_and_dlg = dcn30_calculate_wm_and_dlg, + .calculate_wm_and_dlg = dcn301_calculate_wm_and_dlg, .populate_dml_pipes = dcn30_populate_dml_pipes_from_context, .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer, .add_stream_to_ctx = dcn30_add_stream_to_ctx, diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h index 22f3f643ed1b..346dcd87dc10 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h @@ -191,6 +191,8 @@ struct hubp_funcs { bool (*hubp_in_blank)(struct hubp *hubp); void (*hubp_soft_reset)(struct hubp *hubp, bool reset); + void (*hubp_set_flip_int)(struct hubp *hubp); + }; #endif diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c index 1b971265418b..0e0f494fbb5e 100644 --- a/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c +++ b/drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c @@ -168,6 +168,11 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { .ack = NULL }; +static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = { + .set = NULL, + .ack = NULL +}; + #undef BASE_INNER #define BASE_INNER(seg) DMU_BASE__INST0_SEG ## seg @@ -230,6 +235,17 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = { .funcs = &vblank_irq_info_funcs\ } +/* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic + * of DCE's DC_IRQ_SOURCE_VUPDATEx. + */ +#define vupdate_no_lock_int_entry(reg_num)\ + [DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\ + IRQ_REG_ENTRY(OTG, reg_num,\ + OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_INT_EN,\ + OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_EVENT_CLEAR),\ + .funcs = &vupdate_no_lock_irq_info_funcs\ + } + #define vblank_int_entry(reg_num)\ [DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\ IRQ_REG_ENTRY(OTG, reg_num,\ @@ -338,6 +354,12 @@ irq_source_info_dcn21[DAL_IRQ_SOURCES_NUMBER] = { vupdate_int_entry(3), vupdate_int_entry(4), vupdate_int_entry(5), + vupdate_no_lock_int_entry(0), + vupdate_no_lock_int_entry(1), + vupdate_no_lock_int_entry(2), + vupdate_no_lock_int_entry(3), + vupdate_no_lock_int_entry(4), + vupdate_no_lock_int_entry(5), vblank_int_entry(0), vblank_int_entry(1), vblank_int_entry(2), diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 7b6ef05a1d35..0b5be50b2eee 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -1074,7 +1074,7 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev, static ssize_t amdgpu_read_mask(const char *buf, size_t count, uint32_t *mask) { int ret; - long level; + unsigned long level; char *sub_str = NULL; char *tmp; char buf_cpy[AMDGPU_MASK_BUF_MAX + 1]; @@ -1090,8 +1090,8 @@ static ssize_t amdgpu_read_mask(const char *buf, size_t count, uint32_t *mask) while (tmp[0]) { sub_str = strsep(&tmp, delimiter); if (strlen(sub_str)) { - ret = kstrtol(sub_str, 0, &level); - if (ret) + ret = kstrtoul(sub_str, 0, &level); + if (ret || level > 31) return -EINVAL; *mask |= 1 << level; } else diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c index 82676c086ce4..72cb67d50e4a 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c @@ -587,6 +587,48 @@ static int smu7_force_switch_to_arbf0(struct pp_hwmgr *hwmgr) tmp, MC_CG_ARB_FREQ_F0); } +static uint16_t smu7_override_pcie_speed(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + uint16_t pcie_gen = 0; + + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 && + adev->pm.pcie_gen_mask & CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4) + pcie_gen = 3; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 && + adev->pm.pcie_gen_mask & CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3) + pcie_gen = 2; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 && + adev->pm.pcie_gen_mask & CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2) + pcie_gen = 1; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 && + adev->pm.pcie_gen_mask & CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1) + pcie_gen = 0; + + return pcie_gen; +} + +static uint16_t smu7_override_pcie_width(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + uint16_t pcie_width = 0; + + if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) + pcie_width = 16; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) + pcie_width = 12; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8) + pcie_width = 8; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) + pcie_width = 4; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2) + pcie_width = 2; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1) + pcie_width = 1; + + return pcie_width; +} + static int smu7_setup_default_pcie_table(struct pp_hwmgr *hwmgr) { struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend); @@ -683,6 +725,11 @@ static int smu7_setup_default_pcie_table(struct pp_hwmgr *hwmgr) PP_Min_PCIEGen), get_pcie_lane_support(data->pcie_lane_cap, PP_Max_PCIELane)); + + if (data->pcie_dpm_key_disabled) + phm_setup_pcie_table_entry(&data->dpm_table.pcie_speed_table, + data->dpm_table.pcie_speed_table.count, + smu7_override_pcie_speed(hwmgr), smu7_override_pcie_width(hwmgr)); } return 0; } @@ -1248,6 +1295,13 @@ static int smu7_start_dpm(struct pp_hwmgr *hwmgr) NULL)), "Failed to enable pcie DPM during DPM Start Function!", return -EINVAL); + } else { + PP_ASSERT_WITH_CODE( + (0 == smum_send_msg_to_smc(hwmgr, + PPSMC_MSG_PCIeDPM_Disable, + NULL)), + "Failed to disble pcie DPM during DPM Start Function!", + return -EINVAL); } if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, @@ -5216,10 +5270,10 @@ static int smu7_set_watermarks_for_clocks_ranges(struct pp_hwmgr *hwmgr, for (j = 0; j < dep_sclk_table->count; j++) { valid_entry = false; for (k = 0; k < watermarks->num_wm_sets; k++) { - if (dep_sclk_table->entries[i].clk / 10 >= watermarks->wm_clk_ranges[k].wm_min_eng_clk_in_khz && - dep_sclk_table->entries[i].clk / 10 < watermarks->wm_clk_ranges[k].wm_max_eng_clk_in_khz && - dep_mclk_table->entries[i].clk / 10 >= watermarks->wm_clk_ranges[k].wm_min_mem_clk_in_khz && - dep_mclk_table->entries[i].clk / 10 < watermarks->wm_clk_ranges[k].wm_max_mem_clk_in_khz) { + if (dep_sclk_table->entries[i].clk >= watermarks->wm_clk_ranges[k].wm_min_eng_clk_in_khz / 10 && + dep_sclk_table->entries[i].clk < watermarks->wm_clk_ranges[k].wm_max_eng_clk_in_khz / 10 && + dep_mclk_table->entries[i].clk >= watermarks->wm_clk_ranges[k].wm_min_mem_clk_in_khz / 10 && + dep_mclk_table->entries[i].clk < watermarks->wm_clk_ranges[k].wm_max_mem_clk_in_khz / 10) { valid_entry = true; table->DisplayWatermark[i][j] = watermarks->wm_clk_ranges[k].wm_set_id; break; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c index 1b47f94e0331..892f08f2ba42 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c @@ -54,6 +54,9 @@ #include "smuio/smuio_9_0_offset.h" #include "smuio/smuio_9_0_sh_mask.h" +#define smnPCIE_LC_SPEED_CNTL 0x11140290 +#define smnPCIE_LC_LINK_WIDTH_CNTL 0x11140288 + #define HBM_MEMORY_CHANNEL_WIDTH 128 static const uint32_t channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2}; @@ -443,8 +446,7 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr) if (PP_CAP(PHM_PlatformCaps_VCEDPM)) data->smu_features[GNLD_DPM_VCE].supported = true; - if (!data->registry_data.pcie_dpm_key_disabled) - data->smu_features[GNLD_DPM_LINK].supported = true; + data->smu_features[GNLD_DPM_LINK].supported = true; if (!data->registry_data.dcefclk_dpm_key_disabled) data->smu_features[GNLD_DPM_DCEFCLK].supported = true; @@ -1506,6 +1508,55 @@ static int vega10_populate_single_lclk_level(struct pp_hwmgr *hwmgr, return 0; } +static int vega10_override_pcie_parameters(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + struct vega10_hwmgr *data = + (struct vega10_hwmgr *)(hwmgr->backend); + uint32_t pcie_gen = 0, pcie_width = 0; + PPTable_t *pp_table = &(data->smc_state_table.pp_table); + int i; + + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) + pcie_gen = 3; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) + pcie_gen = 2; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) + pcie_gen = 1; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1) + pcie_gen = 0; + + if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) + pcie_width = 6; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) + pcie_width = 5; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8) + pcie_width = 4; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) + pcie_width = 3; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2) + pcie_width = 2; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1) + pcie_width = 1; + + for (i = 0; i < NUM_LINK_LEVELS; i++) { + if (pp_table->PcieGenSpeed[i] > pcie_gen) + pp_table->PcieGenSpeed[i] = pcie_gen; + + if (pp_table->PcieLaneCount[i] > pcie_width) + pp_table->PcieLaneCount[i] = pcie_width; + } + + if (data->registry_data.pcie_dpm_key_disabled) { + for (i = 0; i < NUM_LINK_LEVELS; i++) { + pp_table->PcieGenSpeed[i] = pcie_gen; + pp_table->PcieLaneCount[i] = pcie_width; + } + } + + return 0; +} + static int vega10_populate_smc_link_levels(struct pp_hwmgr *hwmgr) { int result = -1; @@ -2557,6 +2608,11 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr) "Failed to initialize Link Level!", return result); + result = vega10_override_pcie_parameters(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "Failed to override pcie parameters!", + return result); + result = vega10_populate_all_graphic_levels(hwmgr); PP_ASSERT_WITH_CODE(!result, "Failed to initialize Graphics Level!", @@ -2920,9 +2976,18 @@ static int vega10_start_dpm(struct pp_hwmgr *hwmgr, uint32_t bitmap) } } + if (data->registry_data.pcie_dpm_key_disabled) { + PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr, + false, data->smu_features[GNLD_DPM_LINK].smu_feature_bitmap), + "Attempt to Disable Link DPM feature Failed!", return -EINVAL); + data->smu_features[GNLD_DPM_LINK].enabled = false; + data->smu_features[GNLD_DPM_LINK].supported = false; + } + return 0; } + static int vega10_enable_disable_PCC_limit_feature(struct pp_hwmgr *hwmgr, bool enable) { struct vega10_hwmgr *data = hwmgr->backend; @@ -4537,6 +4602,24 @@ static int vega10_set_ppfeature_status(struct pp_hwmgr *hwmgr, uint64_t new_ppfe return 0; } +static int vega10_get_current_pcie_link_width_level(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = hwmgr->adev; + + return (RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL) & + PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK) + >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT; +} + +static int vega10_get_current_pcie_link_speed_level(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = hwmgr->adev; + + return (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) & + PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK) + >> PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT; +} + static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf) { @@ -4545,8 +4628,9 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table); struct vega10_single_dpm_table *soc_table = &(data->dpm_table.soc_table); struct vega10_single_dpm_table *dcef_table = &(data->dpm_table.dcef_table); - struct vega10_pcie_table *pcie_table = &(data->dpm_table.pcie_table); struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep = NULL; + uint32_t gen_speed, lane_width, current_gen_speed, current_lane_width; + PPTable_t *pptable = &(data->smc_state_table.pp_table); int i, now, size = 0, count = 0; @@ -4603,15 +4687,31 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr, "*" : ""); break; case PP_PCIE: - smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentLinkIndex, &now); - - for (i = 0; i < pcie_table->count; i++) - size += sprintf(buf + size, "%d: %s %s\n", i, - (pcie_table->pcie_gen[i] == 0) ? "2.5GT/s, x1" : - (pcie_table->pcie_gen[i] == 1) ? "5.0GT/s, x16" : - (pcie_table->pcie_gen[i] == 2) ? "8.0GT/s, x16" : "", - (i == now) ? "*" : ""); + current_gen_speed = + vega10_get_current_pcie_link_speed_level(hwmgr); + current_lane_width = + vega10_get_current_pcie_link_width_level(hwmgr); + for (i = 0; i < NUM_LINK_LEVELS; i++) { + gen_speed = pptable->PcieGenSpeed[i]; + lane_width = pptable->PcieLaneCount[i]; + + size += sprintf(buf + size, "%d: %s %s %s\n", i, + (gen_speed == 0) ? "2.5GT/s," : + (gen_speed == 1) ? "5.0GT/s," : + (gen_speed == 2) ? "8.0GT/s," : + (gen_speed == 3) ? "16.0GT/s," : "", + (lane_width == 1) ? "x1" : + (lane_width == 2) ? "x2" : + (lane_width == 3) ? "x4" : + (lane_width == 4) ? "x8" : + (lane_width == 5) ? "x12" : + (lane_width == 6) ? "x16" : "", + (current_gen_speed == gen_speed) && + (current_lane_width == lane_width) ? + "*" : ""); + } break; + case OD_SCLK: if (hwmgr->od_enabled) { size = sprintf(buf, "%s:\n", "OD_SCLK"); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c index dc206fa88c5e..e68651fb7ca4 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c @@ -133,6 +133,7 @@ static void vega12_set_default_registry_data(struct pp_hwmgr *hwmgr) data->registry_data.auto_wattman_debug = 0; data->registry_data.auto_wattman_sample_period = 100; data->registry_data.auto_wattman_threshold = 50; + data->registry_data.pcie_dpm_key_disabled = !(hwmgr->feature_mask & PP_PCIE_DPM_MASK); } static int vega12_set_features_platform_caps(struct pp_hwmgr *hwmgr) @@ -481,6 +482,90 @@ static void vega12_init_dpm_state(struct vega12_dpm_state *dpm_state) dpm_state->hard_max_level = 0xffff; } +static int vega12_override_pcie_parameters(struct pp_hwmgr *hwmgr) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); + struct vega12_hwmgr *data = + (struct vega12_hwmgr *)(hwmgr->backend); + uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg, pcie_gen_arg, pcie_width_arg; + PPTable_t *pp_table = &(data->smc_state_table.pp_table); + int i; + int ret; + + if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) + pcie_gen = 3; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) + pcie_gen = 2; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) + pcie_gen = 1; + else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1) + pcie_gen = 0; + + if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16) + pcie_width = 6; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12) + pcie_width = 5; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8) + pcie_width = 4; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4) + pcie_width = 3; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2) + pcie_width = 2; + else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1) + pcie_width = 1; + + /* Bit 31:16: LCLK DPM level. 0 is DPM0, and 1 is DPM1 + * Bit 15:8: PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4 + * Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 + */ + for (i = 0; i < NUM_LINK_LEVELS; i++) { + pcie_gen_arg = (pp_table->PcieGenSpeed[i] > pcie_gen) ? pcie_gen : + pp_table->PcieGenSpeed[i]; + pcie_width_arg = (pp_table->PcieLaneCount[i] > pcie_width) ? pcie_width : + pp_table->PcieLaneCount[i]; + + if (pcie_gen_arg != pp_table->PcieGenSpeed[i] || pcie_width_arg != + pp_table->PcieLaneCount[i]) { + smu_pcie_arg = (i << 16) | (pcie_gen_arg << 8) | pcie_width_arg; + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_OverridePcieParameters, smu_pcie_arg, + NULL); + PP_ASSERT_WITH_CODE(!ret, + "[OverridePcieParameters] Attempt to override pcie params failed!", + return ret); + } + + /* update the pptable */ + pp_table->PcieGenSpeed[i] = pcie_gen_arg; + pp_table->PcieLaneCount[i] = pcie_width_arg; + } + + /* override to the highest if it's disabled from ppfeaturmask */ + if (data->registry_data.pcie_dpm_key_disabled) { + for (i = 0; i < NUM_LINK_LEVELS; i++) { + smu_pcie_arg = (i << 16) | (pcie_gen << 8) | pcie_width; + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_OverridePcieParameters, smu_pcie_arg, + NULL); + PP_ASSERT_WITH_CODE(!ret, + "[OverridePcieParameters] Attempt to override pcie params failed!", + return ret); + + pp_table->PcieGenSpeed[i] = pcie_gen; + pp_table->PcieLaneCount[i] = pcie_width; + } + ret = vega12_enable_smc_features(hwmgr, + false, + data->smu_features[GNLD_DPM_LINK].smu_feature_bitmap); + PP_ASSERT_WITH_CODE(!ret, + "Attempt to Disable DPM LINK Failed!", + return ret); + data->smu_features[GNLD_DPM_LINK].enabled = false; + data->smu_features[GNLD_DPM_LINK].supported = false; + } + return 0; +} + static int vega12_get_number_of_dpm_level(struct pp_hwmgr *hwmgr, PPCLK_e clk_id, uint32_t *num_of_levels) { @@ -969,6 +1054,11 @@ static int vega12_enable_dpm_tasks(struct pp_hwmgr *hwmgr) "Failed to enable all smu features!", return result); + result = vega12_override_pcie_parameters(hwmgr); + PP_ASSERT_WITH_CODE(!result, + "[EnableDPMTasks] Failed to override pcie parameters!", + return result); + tmp_result = vega12_power_control_set_level(hwmgr); PP_ASSERT_WITH_CODE(!tmp_result, "Failed to power control set level!", diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c index da84012b7fd5..60cde0c52825 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c @@ -171,6 +171,7 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr) data->registry_data.gfxoff_controlled_by_driver = 1; data->gfxoff_allowed = false; data->counter_gfxoff = 0; + data->registry_data.pcie_dpm_key_disabled = !(hwmgr->feature_mask & PP_PCIE_DPM_MASK); } static int vega20_set_features_platform_caps(struct pp_hwmgr *hwmgr) @@ -832,7 +833,9 @@ static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr) struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev); struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend); - uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg; + uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg, pcie_gen_arg, pcie_width_arg; + PPTable_t *pp_table = &(data->smc_state_table.pp_table); + int i; int ret; if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4) @@ -861,17 +864,51 @@ static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr) * Bit 15:8: PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4 * Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 */ - smu_pcie_arg = (1 << 16) | (pcie_gen << 8) | pcie_width; - ret = smum_send_msg_to_smc_with_parameter(hwmgr, - PPSMC_MSG_OverridePcieParameters, smu_pcie_arg, - NULL); - PP_ASSERT_WITH_CODE(!ret, - "[OverridePcieParameters] Attempt to override pcie params failed!", - return ret); + for (i = 0; i < NUM_LINK_LEVELS; i++) { + pcie_gen_arg = (pp_table->PcieGenSpeed[i] > pcie_gen) ? pcie_gen : + pp_table->PcieGenSpeed[i]; + pcie_width_arg = (pp_table->PcieLaneCount[i] > pcie_width) ? pcie_width : + pp_table->PcieLaneCount[i]; + + if (pcie_gen_arg != pp_table->PcieGenSpeed[i] || pcie_width_arg != + pp_table->PcieLaneCount[i]) { + smu_pcie_arg = (i << 16) | (pcie_gen_arg << 8) | pcie_width_arg; + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_OverridePcieParameters, smu_pcie_arg, + NULL); + PP_ASSERT_WITH_CODE(!ret, + "[OverridePcieParameters] Attempt to override pcie params failed!", + return ret); + } + + /* update the pptable */ + pp_table->PcieGenSpeed[i] = pcie_gen_arg; + pp_table->PcieLaneCount[i] = pcie_width_arg; + } + + /* override to the highest if it's disabled from ppfeaturmask */ + if (data->registry_data.pcie_dpm_key_disabled) { + for (i = 0; i < NUM_LINK_LEVELS; i++) { + smu_pcie_arg = (i << 16) | (pcie_gen << 8) | pcie_width; + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_OverridePcieParameters, smu_pcie_arg, + NULL); + PP_ASSERT_WITH_CODE(!ret, + "[OverridePcieParameters] Attempt to override pcie params failed!", + return ret); - data->pcie_parameters_override = true; - data->pcie_gen_level1 = pcie_gen; - data->pcie_width_level1 = pcie_width; + pp_table->PcieGenSpeed[i] = pcie_gen; + pp_table->PcieLaneCount[i] = pcie_width; + } + ret = vega20_enable_smc_features(hwmgr, + false, + data->smu_features[GNLD_DPM_LINK].smu_feature_bitmap); + PP_ASSERT_WITH_CODE(!ret, + "Attempt to Disable DPM LINK Failed!", + return ret); + data->smu_features[GNLD_DPM_LINK].enabled = false; + data->smu_features[GNLD_DPM_LINK].supported = false; + } return 0; } @@ -3320,9 +3357,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, data->od8_settings.od8_settings_array; OverDriveTable_t *od_table = &(data->smc_state_table.overdrive_table); - struct phm_ppt_v3_information *pptable_information = - (struct phm_ppt_v3_information *)hwmgr->pptable; - PPTable_t *pptable = (PPTable_t *)pptable_information->smc_pptable; + PPTable_t *pptable = &(data->smc_state_table.pp_table); struct pp_clock_levels_with_latency clocks; struct vega20_single_dpm_table *fclk_dpm_table = &(data->dpm_table.fclk_table); @@ -3421,13 +3456,9 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr, current_lane_width = vega20_get_current_pcie_link_width_level(hwmgr); for (i = 0; i < NUM_LINK_LEVELS; i++) { - if (i == 1 && data->pcie_parameters_override) { - gen_speed = data->pcie_gen_level1; - lane_width = data->pcie_width_level1; - } else { - gen_speed = pptable->PcieGenSpeed[i]; - lane_width = pptable->PcieLaneCount[i]; - } + gen_speed = pptable->PcieGenSpeed[i]; + lane_width = pptable->PcieLaneCount[i]; + size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i, (gen_speed == 0) ? "2.5GT/s," : (gen_speed == 1) ? "5.0GT/s," : diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c index 5aeb5f5a0447..9be8e1888daf 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c @@ -78,6 +78,9 @@ MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_smc.bin"); #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK 0xC000 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0xE +#define mmTHM_BACO_CNTL_ARCT 0xA7 +#define mmTHM_BACO_CNTL_ARCT_BASE_IDX 0 + static int link_width[] = {0, 1, 2, 4, 8, 12, 16}; static int link_speed[] = {25, 50, 80, 160}; @@ -1581,9 +1584,15 @@ int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state) break; default: if (!ras || !ras->supported) { - data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); - data |= 0x80000000; - WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data); + if (adev->asic_type == CHIP_ARCTURUS) { + data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT); + data |= 0x80000000; + WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT, data); + } else { + data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL); + data |= 0x80000000; + WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data); + } ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnterBaco, 0, NULL); } else { diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index b11c0522a441..405501c74e40 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -2302,7 +2302,8 @@ drm_dp_mst_port_add_connector(struct drm_dp_mst_branch *mstb, } if (port->pdt != DP_PEER_DEVICE_NONE && - drm_dp_mst_is_end_device(port->pdt, port->mcs)) { + drm_dp_mst_is_end_device(port->pdt, port->mcs) && + port->port_num >= DP_MST_LOGICAL_PORT_0) { port->cached_edid = drm_get_edid(port->connector, &port->aux.ddc); drm_connector_set_tile_property(port->connector); diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 4b8119510687..080fd437fd43 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -946,11 +946,15 @@ static int setcmap_legacy(struct fb_cmap *cmap, struct fb_info *info) drm_modeset_lock_all(fb_helper->dev); drm_client_for_each_modeset(modeset, &fb_helper->client) { crtc = modeset->crtc; - if (!crtc->funcs->gamma_set || !crtc->gamma_size) - return -EINVAL; + if (!crtc->funcs->gamma_set || !crtc->gamma_size) { + ret = -EINVAL; + goto out; + } - if (cmap->start + cmap->len > crtc->gamma_size) - return -EINVAL; + if (cmap->start + cmap->len > crtc->gamma_size) { + ret = -EINVAL; + goto out; + } r = crtc->gamma_store; g = r + crtc->gamma_size; @@ -963,8 +967,9 @@ static int setcmap_legacy(struct fb_cmap *cmap, struct fb_info *info) ret = crtc->funcs->gamma_set(crtc, r, g, b, crtc->gamma_size, NULL); if (ret) - return ret; + goto out; } +out: drm_modeset_unlock_all(fb_helper->dev); return ret; @@ -2038,7 +2043,7 @@ static void drm_fbdev_cleanup(struct drm_fb_helper *fb_helper) if (shadow) vfree(shadow); - else + else if (fb_helper->buffer) drm_client_buffer_vunmap(fb_helper->buffer); drm_client_framebuffer_delete(fb_helper->buffer); diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index 9825c378dfa6..6d625cee7a6a 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -357,13 +357,14 @@ static void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem, if (--shmem->vmap_use_count > 0) return; - if (obj->import_attach) + if (obj->import_attach) { dma_buf_vunmap(obj->import_attach->dmabuf, map); - else + } else { vunmap(shmem->vaddr); + drm_gem_shmem_put_pages(shmem); + } shmem->vaddr = NULL; - drm_gem_shmem_put_pages(shmem); } /* @@ -525,14 +526,28 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) struct drm_gem_object *obj = vma->vm_private_data; struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); loff_t num_pages = obj->size >> PAGE_SHIFT; + vm_fault_t ret; struct page *page; + pgoff_t page_offset; - if (vmf->pgoff >= num_pages || WARN_ON_ONCE(!shmem->pages)) - return VM_FAULT_SIGBUS; + /* We don't use vmf->pgoff since that has the fake offset */ + page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; - page = shmem->pages[vmf->pgoff]; + mutex_lock(&shmem->pages_lock); - return vmf_insert_page(vma, vmf->address, page); + if (page_offset >= num_pages || + WARN_ON_ONCE(!shmem->pages) || + shmem->madv < 0) { + ret = VM_FAULT_SIGBUS; + } else { + page = shmem->pages[page_offset]; + + ret = vmf_insert_page(vma, vmf->address, page); + } + + mutex_unlock(&shmem->pages_lock); + + return ret; } static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) @@ -581,9 +596,6 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) struct drm_gem_shmem_object *shmem; int ret; - /* Remove the fake offset */ - vma->vm_pgoff -= drm_vma_node_start(&obj->vma_node); - if (obj->import_attach) { /* Drop the reference drm_gem_mmap_obj() acquired.*/ drm_gem_object_put(obj); diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index f86448ab1fe0..dc734d4828a1 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -99,6 +99,8 @@ static int compat_drm_version(struct file *file, unsigned int cmd, if (copy_from_user(&v32, (void __user *)arg, sizeof(v32))) return -EFAULT; + memset(&v, 0, sizeof(v)); + v = (struct drm_version) { .name_len = v32.name_len, .name = compat_ptr(v32.name), @@ -137,6 +139,9 @@ static int compat_drm_getunique(struct file *file, unsigned int cmd, if (copy_from_user(&uq32, (void __user *)arg, sizeof(uq32))) return -EFAULT; + + memset(&uq, 0, sizeof(uq)); + uq = (struct drm_unique){ .unique_len = uq32.unique_len, .unique = compat_ptr(uq32.unique), @@ -265,6 +270,8 @@ static int compat_drm_getclient(struct file *file, unsigned int cmd, if (copy_from_user(&c32, argp, sizeof(c32))) return -EFAULT; + memset(&client, 0, sizeof(client)); + client.idx = c32.idx; err = drm_ioctl_kernel(file, drm_getclient, &client, 0); @@ -852,6 +859,8 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd, if (copy_from_user(&req32, argp, sizeof(req32))) return -EFAULT; + memset(&req, 0, sizeof(req)); + req.request.type = req32.request.type; req.request.sequence = req32.request.sequence; req.request.signal = req32.request.signal; @@ -889,6 +898,8 @@ static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd, struct drm_mode_fb_cmd2 req64; int err; + memset(&req64, 0, sizeof(req64)); + if (copy_from_user(&req64, argp, offsetof(drm_mode_fb_cmd232_t, modifier))) return -EFAULT; diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c index 33fb2f05ce66..1ac67d4505e0 100644 --- a/drivers/gpu/drm/drm_modes.c +++ b/drivers/gpu/drm/drm_modes.c @@ -762,7 +762,7 @@ int drm_mode_vrefresh(const struct drm_display_mode *mode) if (mode->htotal == 0 || mode->vtotal == 0) return 0; - num = mode->clock * 1000; + num = mode->clock; den = mode->htotal * mode->vtotal; if (mode->flags & DRM_MODE_FLAG_INTERLACE) @@ -772,7 +772,7 @@ int drm_mode_vrefresh(const struct drm_display_mode *mode) if (mode->vscan > 1) den *= mode->vscan; - return DIV_ROUND_CLOSEST(num, den); + return DIV_ROUND_CLOSEST_ULL(mul_u32_u32(num, 1000), den); } EXPORT_SYMBOL(drm_mode_vrefresh); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c index 6d38c5c17f23..a9e696d05b33 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c @@ -689,7 +689,7 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj) struct page **pages = pvec + pinned; ret = pin_user_pages_fast(ptr, num_pages, - !userptr->ro ? FOLL_WRITE : 0, pages); + FOLL_WRITE | FOLL_FORCE, pages); if (ret < 0) { unpin_user_pages(pvec, pinned); kvfree(pvec); diff --git a/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c b/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c index e28107061148..fc9a34ed58bd 100644 --- a/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c +++ b/drivers/gpu/drm/gma500/oaktrail_hdmi_i2c.c @@ -279,11 +279,8 @@ int oaktrail_hdmi_i2c_init(struct pci_dev *dev) hdmi_dev = pci_get_drvdata(dev); i2c_dev = kzalloc(sizeof(struct hdmi_i2c_dev), GFP_KERNEL); - if (i2c_dev == NULL) { - DRM_ERROR("Can't allocate interface\n"); - ret = -ENOMEM; - goto exit; - } + if (!i2c_dev) + return -ENOMEM; i2c_dev->adap = &oaktrail_hdmi_i2c_adapter; i2c_dev->status = I2C_STAT_INIT; @@ -300,16 +297,23 @@ int oaktrail_hdmi_i2c_init(struct pci_dev *dev) oaktrail_hdmi_i2c_adapter.name, hdmi_dev); if (ret) { DRM_ERROR("Failed to request IRQ for I2C controller\n"); - goto err; + goto free_dev; } /* Adapter registration */ ret = i2c_add_numbered_adapter(&oaktrail_hdmi_i2c_adapter); - return ret; + if (ret) { + DRM_ERROR("Failed to add I2C adapter\n"); + goto free_irq; + } -err: + return 0; + +free_irq: + free_irq(dev->irq, hdmi_dev); +free_dev: kfree(i2c_dev); -exit: + return ret; } diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c index cc2d59e8471d..134068f9328d 100644 --- a/drivers/gpu/drm/gma500/psb_drv.c +++ b/drivers/gpu/drm/gma500/psb_drv.c @@ -312,6 +312,8 @@ static int psb_driver_load(struct drm_device *dev, unsigned long flags) if (ret) goto out_err; + ret = -ENOMEM; + dev_priv->mmu = psb_mmu_driver_init(dev, 1, 0, 0); if (!dev_priv->mmu) goto out_err; diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c index d845657fd99c..426f5fb20fad 100644 --- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c +++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c @@ -366,7 +366,6 @@ static void hibmc_pci_remove(struct pci_dev *pdev) drm_dev_unregister(dev); hibmc_unload(dev); - drm_dev_put(dev); } static const struct pci_device_id hibmc_pci_table[] = { diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c index 82674a8853c6..2fa9ba36eeaa 100644 --- a/drivers/gpu/drm/i915/display/intel_hdmi.c +++ b/drivers/gpu/drm/i915/display/intel_hdmi.c @@ -2216,7 +2216,11 @@ hdmi_port_clock_valid(struct intel_hdmi *hdmi, has_hdmi_sink)) return MODE_CLOCK_HIGH; - /* BXT DPLL can't generate 223-240 MHz */ + /* GLK DPLL can't generate 446-480 MHz */ + if (IS_GEMINILAKE(dev_priv) && clock > 446666 && clock < 480000) + return MODE_CLOCK_RANGE; + + /* BXT/GLK DPLL can't generate 223-240 MHz */ if (IS_GEN9_LP(dev_priv) && clock > 223333 && clock < 240000) return MODE_CLOCK_RANGE; diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c index e2716a67b281..d017d341c593 100644 --- a/drivers/gpu/drm/i915/display/intel_vdsc.c +++ b/drivers/gpu/drm/i915/display/intel_vdsc.c @@ -1016,20 +1016,14 @@ static i915_reg_t dss_ctl1_reg(const struct intel_crtc_state *crtc_state) { enum pipe pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe; - if (crtc_state->cpu_transcoder == TRANSCODER_EDP) - return DSS_CTL1; - - return ICL_PIPE_DSS_CTL1(pipe); + return is_pipe_dsc(crtc_state) ? ICL_PIPE_DSS_CTL1(pipe) : DSS_CTL1; } static i915_reg_t dss_ctl2_reg(const struct intel_crtc_state *crtc_state) { enum pipe pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe; - if (crtc_state->cpu_transcoder == TRANSCODER_EDP) - return DSS_CTL2; - - return ICL_PIPE_DSS_CTL2(pipe); + return is_pipe_dsc(crtc_state) ? ICL_PIPE_DSS_CTL2(pipe) : DSS_CTL2; } void intel_dsc_enable(struct intel_encoder *encoder, diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c index e961ad6a3129..4adbc2bba97f 100644 --- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -240,7 +240,7 @@ gen7_emit_state_base_address(struct batch_chunk *batch, /* general */ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; /* surface */ - *cs++ = batch_addr(batch) | surface_state_base | BASE_ADDRESS_MODIFY; + *cs++ = (batch_addr(batch) + surface_state_base) | BASE_ADDRESS_MODIFY; /* dynamic */ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; /* indirect */ @@ -353,19 +353,21 @@ static void gen7_emit_pipeline_flush(struct batch_chunk *batch) static void gen7_emit_pipeline_invalidate(struct batch_chunk *batch) { - u32 *cs = batch_alloc_items(batch, 0, 8); + u32 *cs = batch_alloc_items(batch, 0, 10); /* ivb: Stall before STATE_CACHE_INVALIDATE */ - *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = GFX_OP_PIPE_CONTROL(5); *cs++ = PIPE_CONTROL_STALL_AT_SCOREBOARD | PIPE_CONTROL_CS_STALL; *cs++ = 0; *cs++ = 0; + *cs++ = 0; - *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = GFX_OP_PIPE_CONTROL(5); *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE; *cs++ = 0; *cs++ = 0; + *cs++ = 0; batch_advance(batch, cs); } @@ -391,12 +393,14 @@ static void emit_batch(struct i915_vma * const vma, desc_count); /* Reset inherited context registers */ + gen7_emit_pipeline_flush(&cmds); gen7_emit_pipeline_invalidate(&cmds); batch_add(&cmds, MI_LOAD_REGISTER_IMM(2)); batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_0_GEN7)); batch_add(&cmds, 0xffff0000); batch_add(&cmds, i915_mmio_reg_offset(CACHE_MODE_1)); batch_add(&cmds, 0xffff0000 | PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + gen7_emit_pipeline_invalidate(&cmds); gen7_emit_pipeline_flush(&cmds); /* Switch to the media pipeline and our base address */ diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 0b31670343f5..346aa2057bad 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -709,9 +709,12 @@ static int engine_setup_common(struct intel_engine_cs *engine) goto err_status; } + err = intel_engine_init_cmd_parser(engine); + if (err) + goto err_cmd_parser; + intel_engine_init_active(engine, ENGINE_PHYSICAL); intel_engine_init_execlists(engine); - intel_engine_init_cmd_parser(engine); intel_engine_init__pm(engine); intel_engine_init_retire(engine); @@ -725,6 +728,8 @@ static int engine_setup_common(struct intel_engine_cs *engine) return 0; +err_cmd_parser: + intel_breadcrumbs_free(engine->breadcrumbs); err_status: cleanup_status_page(engine); return err; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index 7fb36b12fe7a..6614f6736486 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -316,7 +316,18 @@ void i915_vma_revoke_fence(struct i915_vma *vma) WRITE_ONCE(fence->vma, NULL); vma->fence = NULL; - with_intel_runtime_pm_if_in_use(fence_to_uncore(fence)->rpm, wakeref) + /* + * Skip the write to HW if and only if the device is currently + * suspended. + * + * If the driver does not currently hold a wakeref (if_in_use == 0), + * the device may currently be runtime suspended, or it may be woken + * up before the suspend takes place. If the device is not suspended + * (powered down) and we skip clearing the fence register, the HW is + * left in an undefined state where we may end up with multiple + * registers overlapping. + */ + with_intel_runtime_pm_if_active(fence_to_uncore(fence)->rpm, wakeref) fence_write(fence); } diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index b0899b665e85..da1d6d58fc42 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -939,7 +939,7 @@ static void fini_hash_table(struct intel_engine_cs *engine) * struct intel_engine_cs based on whether the platform requires software * command parsing. */ -void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) +int intel_engine_init_cmd_parser(struct intel_engine_cs *engine) { const struct drm_i915_cmd_table *cmd_tables; int cmd_table_count; @@ -947,7 +947,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) && engine->class == COPY_ENGINE_CLASS)) - return; + return 0; switch (engine->class) { case RENDER_CLASS: @@ -1012,19 +1012,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) break; default: MISSING_CASE(engine->class); - return; + goto out; } if (!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)) { drm_err(&engine->i915->drm, "%s: command descriptions are not sorted\n", engine->name); - return; + goto out; } if (!validate_regs_sorted(engine)) { drm_err(&engine->i915->drm, "%s: registers are not sorted\n", engine->name); - return; + goto out; } ret = init_hash_table(engine, cmd_tables, cmd_table_count); @@ -1032,10 +1032,17 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine) drm_err(&engine->i915->drm, "%s: initialised failed!\n", engine->name); fini_hash_table(engine); - return; + goto out; } engine->flags |= I915_ENGINE_USING_CMD_PARSER; + +out: + if (intel_engine_requires_cmd_parser(engine) && + !intel_engine_using_cmd_parser(engine)) + return -EINVAL; + + return 0; } /** diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index c6964f82a1bb..bd5f76a28d68 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1947,7 +1947,7 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type); /* i915_cmd_parser.c */ int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); -void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); +int intel_engine_init_cmd_parser(struct intel_engine_cs *engine); void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); int intel_engine_cmd_parser(struct intel_engine_cs *engine, struct i915_vma *batch, diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 649c26518d26..8d9ab4a91544 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -600,7 +600,6 @@ static int append_oa_sample(struct i915_perf_stream *stream, { int report_size = stream->oa_buffer.format_size; struct drm_i915_perf_record_header header; - u32 sample_flags = stream->sample_flags; header.type = DRM_I915_PERF_RECORD_SAMPLE; header.pad = 0; @@ -614,10 +613,8 @@ static int append_oa_sample(struct i915_perf_stream *stream, return -EFAULT; buf += sizeof(header); - if (sample_flags & SAMPLE_OA_REPORT) { - if (copy_to_user(buf, report, report_size)) - return -EFAULT; - } + if (copy_to_user(buf, report, report_size)) + return -EFAULT; (*offset) += header.size; @@ -2678,7 +2675,7 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream) stream->perf->ops.oa_enable(stream); - if (stream->periodic) + if (stream->sample_flags & SAMPLE_OA_REPORT) hrtimer_start(&stream->poll_check_timer, ns_to_ktime(stream->poll_oa_period), HRTIMER_MODE_REL_PINNED); @@ -2741,7 +2738,7 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream) { stream->perf->ops.oa_disable(stream); - if (stream->periodic) + if (stream->sample_flags & SAMPLE_OA_REPORT) hrtimer_cancel(&stream->poll_check_timer); } @@ -3024,7 +3021,7 @@ static ssize_t i915_perf_read(struct file *file, * disabled stream as an error. In particular it might otherwise lead * to a deadlock for blocking file descriptors... */ - if (!stream->enabled) + if (!stream->enabled || !(stream->sample_flags & SAMPLE_OA_REPORT)) return -EIO; if (!(file->f_flags & O_NONBLOCK)) { diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index 153ca9e65382..8b725efb2254 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -412,12 +412,20 @@ intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm) } /** - * intel_runtime_pm_get_if_in_use - grab a runtime pm reference if device in use + * __intel_runtime_pm_get_if_active - grab a runtime pm reference if device is active * @rpm: the intel_runtime_pm structure + * @ignore_usecount: get a ref even if dev->power.usage_count is 0 * * This function grabs a device-level runtime pm reference if the device is - * already in use and ensures that it is powered up. It is illegal to try - * and access the HW should intel_runtime_pm_get_if_in_use() report failure. + * already active and ensures that it is powered up. It is illegal to try + * and access the HW should intel_runtime_pm_get_if_active() report failure. + * + * If @ignore_usecount=true, a reference will be acquired even if there is no + * user requiring the device to be powered up (dev->power.usage_count == 0). + * If the function returns false in this case then it's guaranteed that the + * device's runtime suspend hook has been called already or that it will be + * called (and hence it's also guaranteed that the device's runtime resume + * hook will be called eventually). * * Any runtime pm reference obtained by this function must have a symmetric * call to intel_runtime_pm_put() to release the reference again. @@ -425,7 +433,8 @@ intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm) * Returns: the wakeref cookie to pass to intel_runtime_pm_put(), evaluates * as True if the wakeref was acquired, or False otherwise. */ -intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm) +static intel_wakeref_t __intel_runtime_pm_get_if_active(struct intel_runtime_pm *rpm, + bool ignore_usecount) { if (IS_ENABLED(CONFIG_PM)) { /* @@ -434,7 +443,7 @@ intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm) * function, since the power state is undefined. This applies * atm to the late/early system suspend/resume handlers. */ - if (pm_runtime_get_if_in_use(rpm->kdev) <= 0) + if (pm_runtime_get_if_active(rpm->kdev, ignore_usecount) <= 0) return 0; } @@ -443,6 +452,16 @@ intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm) return track_intel_runtime_pm_wakeref(rpm); } +intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm) +{ + return __intel_runtime_pm_get_if_active(rpm, false); +} + +intel_wakeref_t intel_runtime_pm_get_if_active(struct intel_runtime_pm *rpm) +{ + return __intel_runtime_pm_get_if_active(rpm, true); +} + /** * intel_runtime_pm_get_noresume - grab a runtime pm reference * @rpm: the intel_runtime_pm structure diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.h b/drivers/gpu/drm/i915/intel_runtime_pm.h index ae64ff14c642..1e4ddd11c12b 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.h +++ b/drivers/gpu/drm/i915/intel_runtime_pm.h @@ -177,6 +177,7 @@ void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm); intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm); intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm); +intel_wakeref_t intel_runtime_pm_get_if_active(struct intel_runtime_pm *rpm); intel_wakeref_t intel_runtime_pm_get_noresume(struct intel_runtime_pm *rpm); intel_wakeref_t intel_runtime_pm_get_raw(struct intel_runtime_pm *rpm); @@ -188,6 +189,10 @@ intel_wakeref_t intel_runtime_pm_get_raw(struct intel_runtime_pm *rpm); for ((wf) = intel_runtime_pm_get_if_in_use(rpm); (wf); \ intel_runtime_pm_put((rpm), (wf)), (wf) = 0) +#define with_intel_runtime_pm_if_active(rpm, wf) \ + for ((wf) = intel_runtime_pm_get_if_active(rpm); (wf); \ + intel_runtime_pm_put((rpm), (wf)), (wf) = 0) + void intel_runtime_pm_put_unchecked(struct intel_runtime_pm *rpm); #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM) void intel_runtime_pm_put(struct intel_runtime_pm *rpm, intel_wakeref_t wref); diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index 63b4c5643f9c..5cc20b403a25 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -201,7 +201,7 @@ static int lima_pm_busy(struct lima_device *ldev) int ret; /* resume GPU if it has been suspended by runtime PM */ - ret = pm_runtime_get_sync(ldev->dev); + ret = pm_runtime_resume_and_get(ldev->dev); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c index 74ef6fc0528b..523716e3c278 100644 --- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c +++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c @@ -267,7 +267,7 @@ static void mtk_ovl_layer_config(struct mtk_ddp_comp *comp, unsigned int idx, } con = ovl_fmt_convert(ovl, fmt); - if (state->base.fb->format->has_alpha) + if (state->base.fb && state->base.fb->format->has_alpha) con |= OVL_CON_AEN | OVL_CON_ALPHA; if (pending->rotation & DRM_MODE_REFLECT_Y) { diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 42c5d3246cfc..453d8b4c5763 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -482,6 +482,16 @@ static int meson_probe_remote(struct platform_device *pdev, return count; } +static void meson_drv_shutdown(struct platform_device *pdev) +{ + struct meson_drm *priv = dev_get_drvdata(&pdev->dev); + struct drm_device *drm = priv->drm; + + DRM_DEBUG_DRIVER("\n"); + drm_kms_helper_poll_fini(drm); + drm_atomic_helper_shutdown(drm); +} + static int meson_drv_probe(struct platform_device *pdev) { struct component_match *match = NULL; @@ -553,6 +563,7 @@ static const struct dev_pm_ops meson_drv_pm_ops = { static struct platform_driver meson_drm_platform_driver = { .probe = meson_drv_probe, + .shutdown = meson_drv_shutdown, .driver = { .name = "meson-drm", .of_match_table = dt_match, diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index a5af223eaf50..81506d2539b0 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -626,8 +626,6 @@ static int a5xx_hw_init(struct msm_gpu *gpu) if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI) gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8)); - gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100); - /* Enable USE_RETENTION_FLOPS */ gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c index e6703ae98760..b3318f86aabc 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c @@ -264,6 +264,16 @@ int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) } name = "GPU_SET"; break; + case GMU_OOB_PERFCOUNTER_SET: + if (gmu->legacy) { + request = GMU_OOB_PERFCOUNTER_REQUEST; + ack = GMU_OOB_PERFCOUNTER_ACK; + } else { + request = GMU_OOB_PERFCOUNTER_REQUEST_NEW; + ack = GMU_OOB_PERFCOUNTER_ACK_NEW; + } + name = "PERFCOUNTER"; + break; case GMU_OOB_BOOT_SLUMBER: request = GMU_OOB_BOOT_SLUMBER_REQUEST; ack = GMU_OOB_BOOT_SLUMBER_ACK; @@ -301,9 +311,14 @@ int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) { if (!gmu->legacy) { - WARN_ON(state != GMU_OOB_GPU_SET); - gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, - 1 << GMU_OOB_GPU_SET_CLEAR_NEW); + if (state == GMU_OOB_GPU_SET) { + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, + 1 << GMU_OOB_GPU_SET_CLEAR_NEW); + } else { + WARN_ON(state != GMU_OOB_PERFCOUNTER_SET); + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, + 1 << GMU_OOB_PERFCOUNTER_CLEAR_NEW); + } return; } @@ -312,6 +327,10 @@ void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state) gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, 1 << GMU_OOB_GPU_SET_CLEAR); break; + case GMU_OOB_PERFCOUNTER_SET: + gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, + 1 << GMU_OOB_PERFCOUNTER_CLEAR); + break; case GMU_OOB_BOOT_SLUMBER: gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, 1 << GMU_OOB_BOOT_SLUMBER_CLEAR); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h index c6d2bced8e5d..9fa278de2106 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h @@ -156,6 +156,7 @@ enum a6xx_gmu_oob_state { GMU_OOB_BOOT_SLUMBER = 0, GMU_OOB_GPU_SET, GMU_OOB_DCVS_SET, + GMU_OOB_PERFCOUNTER_SET, }; /* These are the interrupt / ack bits for each OOB request that are set @@ -190,6 +191,13 @@ enum a6xx_gmu_oob_state { #define GMU_OOB_GPU_SET_ACK_NEW 31 #define GMU_OOB_GPU_SET_CLEAR_NEW 31 +#define GMU_OOB_PERFCOUNTER_REQUEST 17 +#define GMU_OOB_PERFCOUNTER_ACK 25 +#define GMU_OOB_PERFCOUNTER_CLEAR 25 + +#define GMU_OOB_PERFCOUNTER_REQUEST_NEW 28 +#define GMU_OOB_PERFCOUNTER_ACK_NEW 30 +#define GMU_OOB_PERFCOUNTER_CLEAR_NEW 30 void a6xx_hfi_init(struct a6xx_gmu *gmu); int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state); diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 130661898546..0366419d8bfe 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -1117,7 +1117,7 @@ static void a6xx_llc_slices_init(struct platform_device *pdev, a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU); a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW); - if (IS_ERR(a6xx_gpu->llc_slice) && IS_ERR(a6xx_gpu->htw_llc_slice)) + if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice)) a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL); } @@ -1169,14 +1169,18 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); + static DEFINE_MUTEX(perfcounter_oob); + + mutex_lock(&perfcounter_oob); /* Force the GPU power on so we can read this register */ - a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); *value = gpu_read64(gpu, REG_A6XX_RBBM_PERFCTR_CP_0_LO, REG_A6XX_RBBM_PERFCTR_CP_0_HI); - a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); + a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); + mutex_unlock(&perfcounter_oob); return 0; } diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index f09175698827..b35914de1b27 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -200,15 +200,15 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu, if (!iommu) return NULL; - if (adreno_is_a6xx(adreno_gpu)) { struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct io_pgtable_domain_attr pgtbl_cfg; + /* - * This allows GPU to set the bus attributes required to use system - * cache on behalf of the iommu page table walker. - */ - if (!IS_ERR(a6xx_gpu->htw_llc_slice)) { + * This allows GPU to set the bus attributes required to use system + * cache on behalf of the iommu page table walker. + */ + if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice)) { pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_ARM_OUTER_WBWA; iommu_domain_set_attr(iommu, DOMAIN_ATTR_IO_PGTABLE_CFG, &pgtbl_cfg); } diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index 0c8f9f88301f..f5d71b274079 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -1180,7 +1180,7 @@ static void mdp5_crtc_pp_done_irq(struct mdp_irq *irq, uint32_t irqstatus) struct mdp5_crtc *mdp5_crtc = container_of(irq, struct mdp5_crtc, pp_done); - complete(&mdp5_crtc->pp_completion); + complete_all(&mdp5_crtc->pp_completion); } static void mdp5_crtc_wait_for_pp_done(struct drm_crtc *crtc) diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index e3462f5d96d7..6cbe10af0a7a 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -631,7 +631,7 @@ static void _dp_ctrl_calc_tu(struct dp_tu_calc_input *in, tu = kzalloc(sizeof(*tu), GFP_KERNEL); if (!tu) - return + return; dp_panel_update_tu_timings(in, tu); diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c index 3bc7ed21de28..81f6794a2510 100644 --- a/drivers/gpu/drm/msm/dp/dp_display.c +++ b/drivers/gpu/drm/msm/dp/dp_display.c @@ -651,8 +651,8 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data) dp_add_event(dp, EV_DISCONNECT_PENDING_TIMEOUT, 0, DP_TIMEOUT_5_SECOND); /* signal the disconnect event early to ensure proper teardown */ - dp_display_handle_plugged_change(g_dp_display, false); reinit_completion(&dp->audio_comp); + dp_display_handle_plugged_change(g_dp_display, false); dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_PLUG_INT_MASK | DP_DP_IRQ_HPD_INT_MASK, true); @@ -890,6 +890,9 @@ static int dp_display_disable(struct dp_display_private *dp, u32 data) /* wait only if audio was enabled */ if (dp_display->audio_enabled) { + /* signal the disconnect event */ + reinit_completion(&dp->audio_comp); + dp_display_handle_plugged_change(dp_display, false); if (!wait_for_completion_timeout(&dp->audio_comp, HZ * 5)) DRM_ERROR("audio comp timeout\n"); diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c index 1afb7c579dbb..eca86bf448f7 100644 --- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c +++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c @@ -139,7 +139,7 @@ const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs = { .disable = dsi_20nm_phy_disable, .init = msm_dsi_phy_init_common, }, - .io_start = { 0xfd998300, 0xfd9a0300 }, + .io_start = { 0xfd998500, 0xfd9a0500 }, .num_dsi_phy = 2, }; diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.c index a45fe95aff49..3dc65877fa10 100644 --- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.c +++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.c @@ -163,7 +163,7 @@ struct msm_dsi_pll *msm_dsi_pll_init(struct platform_device *pdev, break; case MSM_DSI_PHY_7NM: case MSM_DSI_PHY_7NM_V4_1: - pll = msm_dsi_pll_7nm_init(pdev, id); + pll = msm_dsi_pll_7nm_init(pdev, type, id); break; default: pll = ERR_PTR(-ENXIO); diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h index 3405982a092c..bbecb1de5678 100644 --- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h +++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h @@ -117,10 +117,12 @@ msm_dsi_pll_10nm_init(struct platform_device *pdev, int id) } #endif #ifdef CONFIG_DRM_MSM_DSI_7NM_PHY -struct msm_dsi_pll *msm_dsi_pll_7nm_init(struct platform_device *pdev, int id); +struct msm_dsi_pll *msm_dsi_pll_7nm_init(struct platform_device *pdev, + enum msm_dsi_phy_type type, int id); #else static inline struct msm_dsi_pll * -msm_dsi_pll_7nm_init(struct platform_device *pdev, int id) +msm_dsi_pll_7nm_init(struct platform_device *pdev, + enum msm_dsi_phy_type type, int id) { return ERR_PTR(-ENODEV); } diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_7nm.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_7nm.c index 93bf142e4a4e..c1f6708367ae 100644 --- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_7nm.c +++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_7nm.c @@ -852,7 +852,8 @@ static int pll_7nm_register(struct dsi_pll_7nm *pll_7nm) return ret; } -struct msm_dsi_pll *msm_dsi_pll_7nm_init(struct platform_device *pdev, int id) +struct msm_dsi_pll *msm_dsi_pll_7nm_init(struct platform_device *pdev, + enum msm_dsi_phy_type type, int id) { struct dsi_pll_7nm *pll_7nm; struct msm_dsi_pll *pll; @@ -885,7 +886,7 @@ struct msm_dsi_pll *msm_dsi_pll_7nm_init(struct platform_device *pdev, int id) pll = &pll_7nm->base; pll->min_rate = 1000000000UL; pll->max_rate = 3500000000UL; - if (pll->type == MSM_DSI_PHY_7NM_V4_1) { + if (type == MSM_DSI_PHY_7NM_V4_1) { pll->min_rate = 600000000UL; pll->max_rate = (unsigned long)5000000000ULL; /* workaround for max rate overflowing on 32-bit builds: */ diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 108c405e03dd..a5c6b8c23336 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -788,9 +788,10 @@ static int msm_ioctl_gem_info_iova(struct drm_device *dev, struct drm_file *file, struct drm_gem_object *obj, uint64_t *iova) { + struct msm_drm_private *priv = dev->dev_private; struct msm_file_private *ctx = file->driver_priv; - if (!ctx->aspace) + if (!priv->gpu) return -EINVAL; /* @@ -1071,6 +1072,10 @@ static int __maybe_unused msm_pm_resume(struct device *dev) static int __maybe_unused msm_pm_prepare(struct device *dev) { struct drm_device *ddev = dev_get_drvdata(dev); + struct msm_drm_private *priv = ddev ? ddev->dev_private : NULL; + + if (!priv || !priv->kms) + return 0; return drm_mode_config_helper_suspend(ddev); } @@ -1078,6 +1083,10 @@ static int __maybe_unused msm_pm_prepare(struct device *dev) static void __maybe_unused msm_pm_complete(struct device *dev) { struct drm_device *ddev = dev_get_drvdata(dev); + struct msm_drm_private *priv = ddev ? ddev->dev_private : NULL; + + if (!priv || !priv->kms) + return; drm_mode_config_helper_resume(ddev); } @@ -1310,6 +1319,10 @@ static int msm_pdev_remove(struct platform_device *pdev) static void msm_pdev_shutdown(struct platform_device *pdev) { struct drm_device *drm = platform_get_drvdata(pdev); + struct msm_drm_private *priv = drm ? drm->dev_private : NULL; + + if (!priv || !priv->kms) + return; drm_atomic_helper_shutdown(drm); } diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index d04c349d8112..5480852bdeda 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -198,6 +198,8 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit, submit->cmd[i].idx = submit_cmd.submit_idx; submit->cmd[i].nr_relocs = submit_cmd.nr_relocs; + userptr = u64_to_user_ptr(submit_cmd.relocs); + sz = array_size(submit_cmd.nr_relocs, sizeof(struct drm_msm_gem_submit_reloc)); /* check for overflow: */ diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h index d8151a89e163..4735251a394d 100644 --- a/drivers/gpu/drm/msm/msm_kms.h +++ b/drivers/gpu/drm/msm/msm_kms.h @@ -157,6 +157,7 @@ struct msm_kms { * from the crtc's pending_timer close to end of the frame: */ struct mutex commit_lock[MAX_CRTCS]; + struct lock_class_key commit_lock_keys[MAX_CRTCS]; unsigned pending_crtc_mask; struct msm_pending_timer pending_timers[MAX_CRTCS]; }; @@ -166,8 +167,11 @@ static inline int msm_kms_init(struct msm_kms *kms, { unsigned i, ret; - for (i = 0; i < ARRAY_SIZE(kms->commit_lock); i++) - mutex_init(&kms->commit_lock[i]); + for (i = 0; i < ARRAY_SIZE(kms->commit_lock); i++) { + lockdep_register_key(&kms->commit_lock_keys[i]); + __mutex_init(&kms->commit_lock[i], "&kms->commit_lock[i]", + &kms->commit_lock_keys[i]); + } kms->funcs = funcs; diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c index 5f4f09a601d4..f601e91241ac 100644 --- a/drivers/gpu/drm/nouveau/dispnv50/disp.c +++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c @@ -2663,9 +2663,20 @@ nv50_display_create(struct drm_device *dev) else nouveau_display(dev)->format_modifiers = disp50xx_modifiers; - if (disp->disp->object.oclass >= GK104_DISP) { + /* FIXME: 256x256 cursors are supported on Kepler, however unlike Maxwell and later + * generations Kepler requires that we use small pages (4K) for cursor scanout surfaces. The + * proper fix for this is to teach nouveau to migrate fbs being used for the cursor plane to + * small page allocations in prepare_fb(). When this is implemented, we should also force + * large pages (128K) for ovly fbs in order to fix Kepler ovlys. + * But until then, just limit cursors to 128x128 - which is small enough to avoid ever using + * large pages. + */ + if (disp->disp->object.oclass >= GM107_DISP) { dev->mode_config.cursor_width = 256; dev->mode_config.cursor_height = 256; + } else if (disp->disp->object.oclass >= GK104_DISP) { + dev->mode_config.cursor_width = 128; + dev->mode_config.cursor_height = 128; } else { dev->mode_config.cursor_width = 64; dev->mode_config.cursor_height = 64; diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/conn.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/conn.h index f5f59261ea81..d1beaad0c82b 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/conn.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/conn.h @@ -14,6 +14,7 @@ enum dcb_connector_type { DCB_CONNECTOR_LVDS_SPWG = 0x41, DCB_CONNECTOR_DP = 0x46, DCB_CONNECTOR_eDP = 0x47, + DCB_CONNECTOR_mDP = 0x48, DCB_CONNECTOR_HDMI_0 = 0x60, DCB_CONNECTOR_HDMI_1 = 0x61, DCB_CONNECTOR_HDMI_C = 0x63, diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 7ea367a5444d..f1c9a22083be 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -556,7 +556,8 @@ nouveau_bo_sync_for_device(struct nouveau_bo *nvbo) if (nvbo->force_coherent) return; - for (i = 0; i < ttm_dma->num_pages; ++i) { + i = 0; + while (i < ttm_dma->num_pages) { struct page *p = ttm_dma->pages[i]; size_t num_pages = 1; @@ -587,7 +588,8 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo) if (nvbo->force_coherent) return; - for (i = 0; i < ttm_dma->num_pages; ++i) { + i = 0; + while (i < ttm_dma->num_pages) { struct page *p = ttm_dma->pages[i]; size_t num_pages = 1; diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 5d191e58edf1..e48f1f7eb370 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -533,6 +533,7 @@ nouveau_channel_new(struct nouveau_drm *drm, struct nvif_device *device, if (ret) { NV_PRINTK(err, cli, "channel failed to initialise, %d\n", ret); nouveau_channel_del(pchan); + goto done; } ret = nouveau_svmm_join((*pchan)->vmm->svmm, (*pchan)->inst); diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 8b4b3688c7ae..4c992fd5bd68 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -1210,6 +1210,7 @@ drm_conntype_from_dcb(enum dcb_connector_type dcb) case DCB_CONNECTOR_DMS59_DP0: case DCB_CONNECTOR_DMS59_DP1: case DCB_CONNECTOR_DP : + case DCB_CONNECTOR_mDP : case DCB_CONNECTOR_USB_C : return DRM_MODE_CONNECTOR_DisplayPort; case DCB_CONNECTOR_eDP : return DRM_MODE_CONNECTOR_eDP; case DCB_CONNECTOR_HDMI_0 : diff --git a/drivers/gpu/drm/panel/panel-elida-kd35t133.c b/drivers/gpu/drm/panel/panel-elida-kd35t133.c index bc36aa3c1123..fe5ac3ef9018 100644 --- a/drivers/gpu/drm/panel/panel-elida-kd35t133.c +++ b/drivers/gpu/drm/panel/panel-elida-kd35t133.c @@ -265,7 +265,8 @@ static int kd35t133_probe(struct mipi_dsi_device *dsi) dsi->lanes = 1; dsi->format = MIPI_DSI_FMT_RGB888; dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST | - MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_EOT_PACKET; + MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_EOT_PACKET | + MIPI_DSI_CLOCK_NON_CONTINUOUS; drm_panel_init(&ctx->panel, &dsi->dev, &kd35t133_funcs, DRM_MODE_CONNECTOR_DSI); diff --git a/drivers/gpu/drm/panel/panel-mantix-mlaf057we51.c b/drivers/gpu/drm/panel/panel-mantix-mlaf057we51.c index 0c5f22e95c2d..624d17b96a69 100644 --- a/drivers/gpu/drm/panel/panel-mantix-mlaf057we51.c +++ b/drivers/gpu/drm/panel/panel-mantix-mlaf057we51.c @@ -22,6 +22,7 @@ /* Manufacturer specific Commands send via DSI */ #define MANTIX_CMD_OTP_STOP_RELOAD_MIPI 0x41 #define MANTIX_CMD_INT_CANCEL 0x4C +#define MANTIX_CMD_SPI_FINISH 0x90 struct mantix { struct device *dev; @@ -66,6 +67,10 @@ static int mantix_init_sequence(struct mantix *ctx) dsi_generic_write_seq(dsi, 0x80, 0x64, 0x00, 0x64, 0x00, 0x00); msleep(20); + dsi_generic_write_seq(dsi, MANTIX_CMD_SPI_FINISH, 0xA5); + dsi_generic_write_seq(dsi, MANTIX_CMD_OTP_STOP_RELOAD_MIPI, 0x00, 0x2F); + msleep(20); + dev_dbg(dev, "Panel init sequence done\n"); return 0; } diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c index 6b4e97bfd46e..603c5dfe8768 100644 --- a/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c +++ b/drivers/gpu/drm/panel/panel-samsung-s6e63m0.c @@ -25,6 +25,14 @@ /* Manufacturer Command Set */ #define MCS_ELVSS_ON 0xb1 #define MCS_TEMP_SWIRE 0xb2 +#define MCS_PENTILE_1 0xb3 +#define MCS_PENTILE_2 0xb4 +#define MCS_GAMMA_DELTA_Y_RED 0xb5 +#define MCS_GAMMA_DELTA_X_RED 0xb6 +#define MCS_GAMMA_DELTA_Y_GREEN 0xb7 +#define MCS_GAMMA_DELTA_X_GREEN 0xb8 +#define MCS_GAMMA_DELTA_Y_BLUE 0xb9 +#define MCS_GAMMA_DELTA_X_BLUE 0xba #define MCS_MIECTL1 0xc0 #define MCS_BCMODE 0xc1 #define MCS_ERROR_CHECK 0xd5 @@ -281,6 +289,7 @@ struct s6e63m0 { struct backlight_device *bl_dev; u8 lcd_type; u8 elvss_pulse; + bool dsi_mode; struct regulator_bulk_data supplies[2]; struct gpio_desc *reset_gpio; @@ -395,9 +404,21 @@ static int s6e63m0_check_lcd_type(struct s6e63m0 *ctx) static void s6e63m0_init(struct s6e63m0 *ctx) { - s6e63m0_dcs_write_seq_static(ctx, MCS_PANELCTL, - 0x01, 0x27, 0x27, 0x07, 0x07, 0x54, 0x9f, - 0x63, 0x8f, 0x1a, 0x33, 0x0d, 0x00, 0x00); + /* + * We do not know why there is a difference in the DSI mode. + * (No datasheet.) + * + * In the vendor driver this sequence is called + * "SEQ_PANEL_CONDITION_SET" or "DCS_CMD_SEQ_PANEL_COND_SET". + */ + if (ctx->dsi_mode) + s6e63m0_dcs_write_seq_static(ctx, MCS_PANELCTL, + 0x01, 0x2c, 0x2c, 0x07, 0x07, 0x5f, 0xb3, + 0x6d, 0x97, 0x1d, 0x3a, 0x0f, 0x00, 0x00); + else + s6e63m0_dcs_write_seq_static(ctx, MCS_PANELCTL, + 0x01, 0x27, 0x27, 0x07, 0x07, 0x54, 0x9f, + 0x63, 0x8f, 0x1a, 0x33, 0x0d, 0x00, 0x00); s6e63m0_dcs_write_seq_static(ctx, MCS_DISCTL, 0x02, 0x03, 0x1c, 0x10, 0x10); @@ -414,40 +435,40 @@ static void s6e63m0_init(struct s6e63m0 *ctx) s6e63m0_dcs_write_seq_static(ctx, MCS_SRCCTL, 0x00, 0x8e, 0x07); - s6e63m0_dcs_write_seq_static(ctx, 0xb3, 0x6c); + s6e63m0_dcs_write_seq_static(ctx, MCS_PENTILE_1, 0x6c); - s6e63m0_dcs_write_seq_static(ctx, 0xb5, + s6e63m0_dcs_write_seq_static(ctx, MCS_GAMMA_DELTA_Y_RED, 0x2c, 0x12, 0x0c, 0x0a, 0x10, 0x0e, 0x17, 0x13, 0x1f, 0x1a, 0x2a, 0x24, 0x1f, 0x1b, 0x1a, 0x17, 0x2b, 0x26, 0x22, 0x20, 0x3a, 0x34, 0x30, 0x2c, 0x29, 0x26, 0x25, 0x23, 0x21, 0x20, 0x1e, 0x1e); - s6e63m0_dcs_write_seq_static(ctx, 0xb6, + s6e63m0_dcs_write_seq_static(ctx, MCS_GAMMA_DELTA_X_RED, 0x00, 0x00, 0x11, 0x22, 0x33, 0x44, 0x44, 0x44, 0x55, 0x55, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66); - s6e63m0_dcs_write_seq_static(ctx, 0xb7, + s6e63m0_dcs_write_seq_static(ctx, MCS_GAMMA_DELTA_Y_GREEN, 0x2c, 0x12, 0x0c, 0x0a, 0x10, 0x0e, 0x17, 0x13, 0x1f, 0x1a, 0x2a, 0x24, 0x1f, 0x1b, 0x1a, 0x17, 0x2b, 0x26, 0x22, 0x20, 0x3a, 0x34, 0x30, 0x2c, 0x29, 0x26, 0x25, 0x23, 0x21, 0x20, 0x1e, 0x1e); - s6e63m0_dcs_write_seq_static(ctx, 0xb8, + s6e63m0_dcs_write_seq_static(ctx, MCS_GAMMA_DELTA_X_GREEN, 0x00, 0x00, 0x11, 0x22, 0x33, 0x44, 0x44, 0x44, 0x55, 0x55, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66); - s6e63m0_dcs_write_seq_static(ctx, 0xb9, + s6e63m0_dcs_write_seq_static(ctx, MCS_GAMMA_DELTA_Y_BLUE, 0x2c, 0x12, 0x0c, 0x0a, 0x10, 0x0e, 0x17, 0x13, 0x1f, 0x1a, 0x2a, 0x24, 0x1f, 0x1b, 0x1a, 0x17, 0x2b, 0x26, 0x22, 0x20, 0x3a, 0x34, 0x30, 0x2c, 0x29, 0x26, 0x25, 0x23, 0x21, 0x20, 0x1e, 0x1e); - s6e63m0_dcs_write_seq_static(ctx, 0xba, + s6e63m0_dcs_write_seq_static(ctx, MCS_GAMMA_DELTA_X_BLUE, 0x00, 0x00, 0x11, 0x22, 0x33, 0x44, 0x44, 0x44, 0x55, 0x55, 0x66, 0x66, 0x66, 0x66, 0x66, 0x66); @@ -671,12 +692,12 @@ static const struct backlight_ops s6e63m0_backlight_ops = { .update_status = s6e63m0_set_brightness, }; -static int s6e63m0_backlight_register(struct s6e63m0 *ctx) +static int s6e63m0_backlight_register(struct s6e63m0 *ctx, u32 max_brightness) { struct backlight_properties props = { .type = BACKLIGHT_RAW, - .brightness = MAX_BRIGHTNESS, - .max_brightness = MAX_BRIGHTNESS + .brightness = max_brightness, + .max_brightness = max_brightness, }; struct device *dev = ctx->dev; int ret = 0; @@ -698,12 +719,14 @@ int s6e63m0_probe(struct device *dev, bool dsi_mode) { struct s6e63m0 *ctx; + u32 max_brightness; int ret; ctx = devm_kzalloc(dev, sizeof(struct s6e63m0), GFP_KERNEL); if (!ctx) return -ENOMEM; + ctx->dsi_mode = dsi_mode; ctx->dcs_read = dcs_read; ctx->dcs_write = dcs_write; dev_set_drvdata(dev, ctx); @@ -712,6 +735,14 @@ int s6e63m0_probe(struct device *dev, ctx->enabled = false; ctx->prepared = false; + ret = device_property_read_u32(dev, "max-brightness", &max_brightness); + if (ret) + max_brightness = MAX_BRIGHTNESS; + if (max_brightness > MAX_BRIGHTNESS) { + dev_err(dev, "illegal max brightness specified\n"); + max_brightness = MAX_BRIGHTNESS; + } + ctx->supplies[0].supply = "vdd3"; ctx->supplies[1].supply = "vci"; ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ctx->supplies), @@ -731,7 +762,7 @@ int s6e63m0_probe(struct device *dev, dsi_mode ? DRM_MODE_CONNECTOR_DSI : DRM_MODE_CONNECTOR_DPI); - ret = s6e63m0_backlight_register(ctx); + ret = s6e63m0_backlight_register(ctx, max_brightness); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c index 012bce0cdb65..10738e04c09b 100644 --- a/drivers/gpu/drm/qxl/qxl_display.c +++ b/drivers/gpu/drm/qxl/qxl_display.c @@ -328,6 +328,7 @@ static void qxl_crtc_update_monitors_config(struct drm_crtc *crtc, head.id = i; head.flags = 0; + head.surface_id = 0; oldcount = qdev->monitors_config->count; if (crtc->state->active) { struct drm_display_mode *mode = &crtc->mode; diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5f3adba43e47..aa3b589f30a1 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -575,6 +575,8 @@ struct radeon_gem { struct list_head objects; }; +extern const struct drm_gem_object_funcs radeon_gem_object_funcs; + int radeon_gem_init(struct radeon_device *rdev); void radeon_gem_fini(struct radeon_device *rdev); int radeon_gem_object_create(struct radeon_device *rdev, unsigned long size, diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index b6b21d2e7262..f17f621077de 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -43,7 +43,7 @@ struct sg_table *radeon_gem_prime_get_sg_table(struct drm_gem_object *obj); int radeon_gem_prime_pin(struct drm_gem_object *obj); void radeon_gem_prime_unpin(struct drm_gem_object *obj); -static const struct drm_gem_object_funcs radeon_gem_object_funcs; +const struct drm_gem_object_funcs radeon_gem_object_funcs; static void radeon_gem_object_free(struct drm_gem_object *gobj) { @@ -227,7 +227,7 @@ static int radeon_gem_handle_lockup(struct radeon_device *rdev, int r) return r; } -static const struct drm_gem_object_funcs radeon_gem_object_funcs = { +const struct drm_gem_object_funcs radeon_gem_object_funcs = { .free = radeon_gem_object_free, .open = radeon_gem_object_open, .close = radeon_gem_object_close, diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c index dd482edc819c..d0ff3ce68a4f 100644 --- a/drivers/gpu/drm/radeon/radeon_prime.c +++ b/drivers/gpu/drm/radeon/radeon_prime.c @@ -56,6 +56,8 @@ struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev, if (ret) return ERR_PTR(ret); + bo->tbo.base.funcs = &radeon_gem_object_funcs; + mutex_lock(&rdev->gem.mutex); list_add_tail(&bo->list, &rdev->gem.objects); mutex_unlock(&rdev->gem.mutex); diff --git a/drivers/gpu/drm/rcar-du/rcar_cmm.c b/drivers/gpu/drm/rcar-du/rcar_cmm.c index c578095b09a5..382d53f8a22e 100644 --- a/drivers/gpu/drm/rcar-du/rcar_cmm.c +++ b/drivers/gpu/drm/rcar-du/rcar_cmm.c @@ -122,7 +122,7 @@ int rcar_cmm_enable(struct platform_device *pdev) { int ret; - ret = pm_runtime_get_sync(&pdev->dev); + ret = pm_runtime_resume_and_get(&pdev->dev); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c index b5fb941e0f53..e23b9c7b4afe 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.c @@ -730,13 +730,10 @@ static void rcar_du_crtc_atomic_enable(struct drm_crtc *crtc, */ if (rcdu->info->lvds_clk_mask & BIT(rcrtc->index) && rstate->outputs == BIT(RCAR_DU_OUTPUT_DPAD0)) { - struct rcar_du_encoder *encoder = - rcdu->encoders[RCAR_DU_OUTPUT_LVDS0 + rcrtc->index]; + struct drm_bridge *bridge = rcdu->lvds[rcrtc->index]; const struct drm_display_mode *mode = &crtc->state->adjusted_mode; - struct drm_bridge *bridge; - bridge = drm_bridge_chain_get_first_bridge(&encoder->base); rcar_lvds_clk_enable(bridge, mode->clock * 1000); } @@ -764,15 +761,12 @@ static void rcar_du_crtc_atomic_disable(struct drm_crtc *crtc, if (rcdu->info->lvds_clk_mask & BIT(rcrtc->index) && rstate->outputs == BIT(RCAR_DU_OUTPUT_DPAD0)) { - struct rcar_du_encoder *encoder = - rcdu->encoders[RCAR_DU_OUTPUT_LVDS0 + rcrtc->index]; - struct drm_bridge *bridge; + struct drm_bridge *bridge = rcdu->lvds[rcrtc->index]; /* * Disable the LVDS clock output, see * rcar_du_crtc_atomic_enable(). */ - bridge = drm_bridge_chain_get_first_bridge(&encoder->base); rcar_lvds_clk_disable(bridge); } diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.h b/drivers/gpu/drm/rcar-du/rcar_du_drv.h index 61504c54e2ec..3597a179bfb7 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_drv.h +++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.h @@ -20,10 +20,10 @@ struct clk; struct device; +struct drm_bridge; struct drm_device; struct drm_property; struct rcar_du_device; -struct rcar_du_encoder; #define RCAR_DU_FEATURE_CRTC_IRQ_CLOCK BIT(0) /* Per-CRTC IRQ and clock */ #define RCAR_DU_FEATURE_VSP1_SOURCE BIT(1) /* Has inputs from VSP1 */ @@ -71,6 +71,7 @@ struct rcar_du_device_info { #define RCAR_DU_MAX_CRTCS 4 #define RCAR_DU_MAX_GROUPS DIV_ROUND_UP(RCAR_DU_MAX_CRTCS, 2) #define RCAR_DU_MAX_VSPS 4 +#define RCAR_DU_MAX_LVDS 2 struct rcar_du_device { struct device *dev; @@ -83,11 +84,10 @@ struct rcar_du_device { struct rcar_du_crtc crtcs[RCAR_DU_MAX_CRTCS]; unsigned int num_crtcs; - struct rcar_du_encoder *encoders[RCAR_DU_OUTPUT_MAX]; - struct rcar_du_group groups[RCAR_DU_MAX_GROUPS]; struct platform_device *cmms[RCAR_DU_MAX_CRTCS]; struct rcar_du_vsp vsps[RCAR_DU_MAX_VSPS]; + struct drm_bridge *lvds[RCAR_DU_MAX_LVDS]; struct { struct drm_property *colorkey; diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c index b0335da0c161..50fc14534fa4 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c @@ -57,7 +57,6 @@ int rcar_du_encoder_init(struct rcar_du_device *rcdu, if (renc == NULL) return -ENOMEM; - rcdu->encoders[output] = renc; renc->output = output; encoder = rcar_encoder_to_drm_encoder(renc); @@ -91,6 +90,10 @@ int rcar_du_encoder_init(struct rcar_du_device *rcdu, ret = -EPROBE_DEFER; goto done; } + + if (output == RCAR_DU_OUTPUT_LVDS0 || + output == RCAR_DU_OUTPUT_LVDS1) + rcdu->lvds[output - RCAR_DU_OUTPUT_LVDS0] = bridge; } /* diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c index 72dda446355f..7015e22872bb 100644 --- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c +++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c @@ -700,10 +700,10 @@ static int rcar_du_cmm_init(struct rcar_du_device *rcdu) int ret; cmm = of_parse_phandle(np, "renesas,cmms", i); - if (IS_ERR(cmm)) { + if (!cmm) { dev_err(rcdu->dev, "Failed to parse 'renesas,cmms' property\n"); - return PTR_ERR(cmm); + return -EINVAL; } if (!of_device_is_available(cmm)) { @@ -713,10 +713,10 @@ static int rcar_du_cmm_init(struct rcar_du_device *rcdu) } pdev = of_find_device_by_node(cmm); - if (IS_ERR(pdev)) { + if (!pdev) { dev_err(rcdu->dev, "No device found for CMM%u\n", i); of_node_put(cmm); - return PTR_ERR(pdev); + return -EINVAL; } of_node_put(cmm); diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h index 4a2099cb582e..857d97cdc67c 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h @@ -17,9 +17,20 @@ #define NUM_YUV2YUV_COEFFICIENTS 12 +/* AFBC supports a number of configurable modes. Relevant to us is block size + * (16x16 or 32x8), storage modifiers (SPARSE, SPLIT), and the YUV-like + * colourspace transform (YTR). 16x16 SPARSE mode is always used. SPLIT mode + * could be enabled via the hreg_block_split register, but is not currently + * handled. The colourspace transform is implicitly always assumed by the + * decoder, so consumers must use this transform as well. + * + * Failure to match modifiers will cause errors displaying AFBC buffers + * produced by conformant AFBC producers, including Mesa. + */ #define ROCKCHIP_AFBC_MOD \ DRM_FORMAT_MOD_ARM_AFBC( \ AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | AFBC_FORMAT_MOD_SPARSE \ + | AFBC_FORMAT_MOD_YTR \ ) enum vop_data_format { diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index b498d474ef9e..864e423d6d2b 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -891,6 +891,9 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched) if (sched->thread) kthread_stop(sched->thread); + /* Confirm no work left behind accessing device structures */ + cancel_delayed_work_sync(&sched->work_tdr); + sched->ready = false; } EXPORT_SYMBOL(drm_sched_fini); diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c index 1e643bc7e786..9f06dec0fc61 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.c +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c @@ -569,30 +569,13 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon, if (info->bus_flags & DRM_BUS_FLAG_DE_LOW) val |= SUN4I_TCON0_IO_POL_DE_NEGATIVE; - /* - * On A20 and similar SoCs, the only way to achieve Positive Edge - * (Rising Edge), is setting dclk clock phase to 2/3(240°). - * By default TCON works in Negative Edge(Falling Edge), - * this is why phase is set to 0 in that case. - * Unfortunately there's no way to logically invert dclk through - * IO_POL register. - * The only acceptable way to work, triple checked with scope, - * is using clock phase set to 0° for Negative Edge and set to 240° - * for Positive Edge. - * On A33 and similar SoCs there would be a 90° phase option, - * but it divides also dclk by 2. - * Following code is a way to avoid quirks all around TCON - * and DOTCLOCK drivers. - */ - if (info->bus_flags & DRM_BUS_FLAG_PIXDATA_DRIVE_POSEDGE) - clk_set_phase(tcon->dclk, 240); - if (info->bus_flags & DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE) - clk_set_phase(tcon->dclk, 0); + val |= SUN4I_TCON0_IO_POL_DCLK_DRIVE_NEGEDGE; regmap_update_bits(tcon->regs, SUN4I_TCON0_IO_POL_REG, SUN4I_TCON0_IO_POL_HSYNC_POSITIVE | SUN4I_TCON0_IO_POL_VSYNC_POSITIVE | + SUN4I_TCON0_IO_POL_DCLK_DRIVE_NEGEDGE | SUN4I_TCON0_IO_POL_DE_NEGATIVE, val); diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.h b/drivers/gpu/drm/sun4i/sun4i_tcon.h index ee555318e3c2..e624f6977eb8 100644 --- a/drivers/gpu/drm/sun4i/sun4i_tcon.h +++ b/drivers/gpu/drm/sun4i/sun4i_tcon.h @@ -113,6 +113,7 @@ #define SUN4I_TCON0_IO_POL_REG 0x88 #define SUN4I_TCON0_IO_POL_DCLK_PHASE(phase) ((phase & 3) << 28) #define SUN4I_TCON0_IO_POL_DE_NEGATIVE BIT(27) +#define SUN4I_TCON0_IO_POL_DCLK_DRIVE_NEGEDGE BIT(26) #define SUN4I_TCON0_IO_POL_HSYNC_POSITIVE BIT(25) #define SUN4I_TCON0_IO_POL_VSYNC_POSITIVE BIT(24) diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c index 85dd7131553a..0ae3a025efe9 100644 --- a/drivers/gpu/drm/tegra/dc.c +++ b/drivers/gpu/drm/tegra/dc.c @@ -2186,7 +2186,7 @@ static int tegra_dc_runtime_resume(struct host1x_client *client) struct device *dev = client->dev; int err; - err = pm_runtime_get_sync(dev); + err = pm_runtime_resume_and_get(dev); if (err < 0) { dev_err(dev, "failed to get runtime PM: %d\n", err); return err; diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c index 5691ef1b0e58..f46d377f0c30 100644 --- a/drivers/gpu/drm/tegra/dsi.c +++ b/drivers/gpu/drm/tegra/dsi.c @@ -1111,7 +1111,7 @@ static int tegra_dsi_runtime_resume(struct host1x_client *client) struct device *dev = client->dev; int err; - err = pm_runtime_get_sync(dev); + err = pm_runtime_resume_and_get(dev); if (err < 0) { dev_err(dev, "failed to get runtime PM: %d\n", err); return err; diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c index d09a24931c87..e5d2a4026028 100644 --- a/drivers/gpu/drm/tegra/hdmi.c +++ b/drivers/gpu/drm/tegra/hdmi.c @@ -1510,7 +1510,7 @@ static int tegra_hdmi_runtime_resume(struct host1x_client *client) struct device *dev = client->dev; int err; - err = pm_runtime_get_sync(dev); + err = pm_runtime_resume_and_get(dev); if (err < 0) { dev_err(dev, "failed to get runtime PM: %d\n", err); return err; diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index 22a03f7ffdc1..5ce771cba133 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -789,7 +789,7 @@ static int tegra_display_hub_runtime_resume(struct host1x_client *client) unsigned int i; int err; - err = pm_runtime_get_sync(dev); + err = pm_runtime_resume_and_get(dev); if (err < 0) { dev_err(dev, "failed to get runtime PM: %d\n", err); return err; diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c index cc2aa2308a51..f02a035dda45 100644 --- a/drivers/gpu/drm/tegra/sor.c +++ b/drivers/gpu/drm/tegra/sor.c @@ -3218,7 +3218,7 @@ static int tegra_sor_runtime_resume(struct host1x_client *client) struct device *dev = client->dev; int err; - err = pm_runtime_get_sync(dev); + err = pm_runtime_resume_and_get(dev); if (err < 0) { dev_err(dev, "failed to get runtime PM: %d\n", err); return err; diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index ade56b860cf9..b77f726303d8 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -314,7 +314,7 @@ static int vic_open_channel(struct tegra_drm_client *client, struct vic *vic = to_vic(client); int err; - err = pm_runtime_get_sync(vic->dev); + err = pm_runtime_resume_and_get(vic->dev); if (err < 0) return err; diff --git a/drivers/gpu/drm/tiny/gm12u320.c b/drivers/gpu/drm/tiny/gm12u320.c index 33f65f4626e5..23866a54e3f9 100644 --- a/drivers/gpu/drm/tiny/gm12u320.c +++ b/drivers/gpu/drm/tiny/gm12u320.c @@ -83,6 +83,7 @@ MODULE_PARM_DESC(eco_mode, "Turn on Eco mode (less bright, more silent)"); struct gm12u320_device { struct drm_device dev; + struct device *dmadev; struct drm_simple_display_pipe pipe; struct drm_connector conn; unsigned char *cmd_buf; @@ -601,6 +602,22 @@ static const uint64_t gm12u320_pipe_modifiers[] = { DRM_FORMAT_MOD_INVALID }; +/* + * FIXME: Dma-buf sharing requires DMA support by the importing device. + * This function is a workaround to make USB devices work as well. + * See todo.rst for how to fix the issue in the dma-buf framework. + */ +static struct drm_gem_object *gm12u320_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct gm12u320_device *gm12u320 = to_gm12u320(dev); + + if (!gm12u320->dmadev) + return ERR_PTR(-ENODEV); + + return drm_gem_prime_import_dev(dev, dma_buf, gm12u320->dmadev); +} + DEFINE_DRM_GEM_FOPS(gm12u320_fops); static const struct drm_driver gm12u320_drm_driver = { @@ -614,6 +631,7 @@ static const struct drm_driver gm12u320_drm_driver = { .fops = &gm12u320_fops, DRM_GEM_SHMEM_DRIVER_OPS, + .gem_prime_import = gm12u320_gem_prime_import, }; static const struct drm_mode_config_funcs gm12u320_mode_config_funcs = { @@ -640,15 +658,18 @@ static int gm12u320_usb_probe(struct usb_interface *interface, struct gm12u320_device, dev); if (IS_ERR(gm12u320)) return PTR_ERR(gm12u320); + dev = &gm12u320->dev; + + gm12u320->dmadev = usb_intf_get_dma_device(to_usb_interface(dev->dev)); + if (!gm12u320->dmadev) + drm_warn(dev, "buffer sharing not supported"); /* not an error */ INIT_DELAYED_WORK(&gm12u320->fb_update.work, gm12u320_fb_update_work); mutex_init(&gm12u320->fb_update.lock); - dev = &gm12u320->dev; - ret = drmm_mode_config_init(dev); if (ret) - return ret; + goto err_put_device; dev->mode_config.min_width = GM12U320_USER_WIDTH; dev->mode_config.max_width = GM12U320_USER_WIDTH; @@ -658,15 +679,15 @@ static int gm12u320_usb_probe(struct usb_interface *interface, ret = gm12u320_usb_alloc(gm12u320); if (ret) - return ret; + goto err_put_device; ret = gm12u320_set_ecomode(gm12u320); if (ret) - return ret; + goto err_put_device; ret = gm12u320_conn_init(gm12u320); if (ret) - return ret; + goto err_put_device; ret = drm_simple_display_pipe_init(&gm12u320->dev, &gm12u320->pipe, @@ -676,24 +697,31 @@ static int gm12u320_usb_probe(struct usb_interface *interface, gm12u320_pipe_modifiers, &gm12u320->conn); if (ret) - return ret; + goto err_put_device; drm_mode_config_reset(dev); usb_set_intfdata(interface, dev); ret = drm_dev_register(dev, 0); if (ret) - return ret; + goto err_put_device; drm_fbdev_generic_setup(dev, 0); return 0; + +err_put_device: + put_device(gm12u320->dmadev); + return ret; } static void gm12u320_usb_disconnect(struct usb_interface *interface) { struct drm_device *dev = usb_get_intfdata(interface); + struct gm12u320_device *gm12u320 = to_gm12u320(dev); + put_device(gm12u320->dmadev); + gm12u320->dmadev = NULL; drm_dev_unplug(dev); drm_atomic_helper_shutdown(dev); } diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 9a03c7834b1e..22073e77fdf9 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -967,8 +967,10 @@ static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo, return ret; /* move to the bounce domain */ ret = ttm_bo_handle_move_mem(bo, &hop_mem, false, ctx, NULL); - if (ret) + if (ret) { + ttm_resource_free(bo, &hop_mem); return ret; + } return 0; } @@ -1000,18 +1002,19 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo, * stop and the driver will be called to make * the second hop. */ -bounce: ret = ttm_bo_mem_space(bo, placement, &mem, ctx); if (ret) return ret; +bounce: ret = ttm_bo_handle_move_mem(bo, &mem, false, ctx, &hop); if (ret == -EMULTIHOP) { ret = ttm_bo_bounce_temp_buffer(bo, &mem, ctx, &hop); if (ret) - return ret; + goto out; /* try and move to final place now. */ goto bounce; } +out: if (ret) ttm_resource_free(bo, &mem); return ret; diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 6e27cb1bf48b..4eb6efb8b8c0 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -268,13 +268,13 @@ static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool, /* Remove a pool_type from the global shrinker list and free all pages */ static void ttm_pool_type_fini(struct ttm_pool_type *pt) { - struct page *p, *tmp; + struct page *p; mutex_lock(&shrinker_lock); list_del(&pt->shrinker_list); mutex_unlock(&shrinker_lock); - list_for_each_entry_safe(p, tmp, &pt->pages, lru) + while ((p = ttm_pool_type_take(pt))) ttm_pool_free_page(pt->pool, pt->caching, pt->order, p); } diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c index 9269092697d8..5703277c6f52 100644 --- a/drivers/gpu/drm/udl/udl_drv.c +++ b/drivers/gpu/drm/udl/udl_drv.c @@ -32,6 +32,22 @@ static int udl_usb_resume(struct usb_interface *interface) return drm_mode_config_helper_resume(dev); } +/* + * FIXME: Dma-buf sharing requires DMA support by the importing device. + * This function is a workaround to make USB devices work as well. + * See todo.rst for how to fix the issue in the dma-buf framework. + */ +static struct drm_gem_object *udl_driver_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf) +{ + struct udl_device *udl = to_udl(dev); + + if (!udl->dmadev) + return ERR_PTR(-ENODEV); + + return drm_gem_prime_import_dev(dev, dma_buf, udl->dmadev); +} + DEFINE_DRM_GEM_FOPS(udl_driver_fops); static const struct drm_driver driver = { @@ -40,6 +56,7 @@ static const struct drm_driver driver = { /* GEM hooks */ .fops = &udl_driver_fops, DRM_GEM_SHMEM_DRIVER_OPS, + .gem_prime_import = udl_driver_gem_prime_import, .name = DRIVER_NAME, .desc = DRIVER_DESC, diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h index 875e73551ae9..cc16a13316e4 100644 --- a/drivers/gpu/drm/udl/udl_drv.h +++ b/drivers/gpu/drm/udl/udl_drv.h @@ -50,6 +50,7 @@ struct urb_list { struct udl_device { struct drm_device drm; struct device *dev; + struct device *dmadev; struct drm_simple_display_pipe display_pipe; diff --git a/drivers/gpu/drm/udl/udl_main.c b/drivers/gpu/drm/udl/udl_main.c index 0e2a376cb075..853f147036f6 100644 --- a/drivers/gpu/drm/udl/udl_main.c +++ b/drivers/gpu/drm/udl/udl_main.c @@ -315,6 +315,10 @@ int udl_init(struct udl_device *udl) DRM_DEBUG("\n"); + udl->dmadev = usb_intf_get_dma_device(to_usb_interface(dev->dev)); + if (!udl->dmadev) + drm_warn(dev, "buffer sharing not supported"); /* not an error */ + mutex_init(&udl->gem_lock); if (!udl_parse_vendor_descriptor(udl)) { @@ -343,12 +347,18 @@ int udl_init(struct udl_device *udl) err: if (udl->urbs.count) udl_free_urb_list(dev); + put_device(udl->dmadev); DRM_ERROR("%d\n", ret); return ret; } int udl_drop_usb(struct drm_device *dev) { + struct udl_device *udl = to_udl(dev); + udl_free_urb_list(dev); + put_device(udl->dmadev); + udl->dmadev = NULL; + return 0; } diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c index 98cab0bbe92d..a9f494590c57 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi.c +++ b/drivers/gpu/drm/vc4/vc4_hdmi.c @@ -119,24 +119,57 @@ static void vc5_hdmi_reset(struct vc4_hdmi *vc4_hdmi) HDMI_READ(HDMI_CLOCK_STOP) | VC4_DVP_HT_CLOCK_STOP_PIXEL); } +#ifdef CONFIG_DRM_VC4_HDMI_CEC +static void vc4_hdmi_cec_update_clk_div(struct vc4_hdmi *vc4_hdmi) +{ + u16 clk_cnt; + u32 value; + + value = HDMI_READ(HDMI_CEC_CNTRL_1); + value &= ~VC4_HDMI_CEC_DIV_CLK_CNT_MASK; + + /* + * Set the clock divider: the hsm_clock rate and this divider + * setting will give a 40 kHz CEC clock. + */ + clk_cnt = clk_get_rate(vc4_hdmi->hsm_clock) / CEC_CLOCK_FREQ; + value |= clk_cnt << VC4_HDMI_CEC_DIV_CLK_CNT_SHIFT; + HDMI_WRITE(HDMI_CEC_CNTRL_1, value); +} +#else +static void vc4_hdmi_cec_update_clk_div(struct vc4_hdmi *vc4_hdmi) {} +#endif + static enum drm_connector_status vc4_hdmi_connector_detect(struct drm_connector *connector, bool force) { struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector); + bool connected = false; if (vc4_hdmi->hpd_gpio) { if (gpio_get_value_cansleep(vc4_hdmi->hpd_gpio) ^ vc4_hdmi->hpd_active_low) - return connector_status_connected; - cec_phys_addr_invalidate(vc4_hdmi->cec_adap); - return connector_status_disconnected; + connected = true; + } else if (drm_probe_ddc(vc4_hdmi->ddc)) { + connected = true; + } else if (HDMI_READ(HDMI_HOTPLUG) & VC4_HDMI_HOTPLUG_CONNECTED) { + connected = true; } - if (drm_probe_ddc(vc4_hdmi->ddc)) - return connector_status_connected; + if (connected) { + if (connector->status != connector_status_connected) { + struct edid *edid = drm_get_edid(connector, vc4_hdmi->ddc); + + if (edid) { + cec_s_phys_addr_from_edid(vc4_hdmi->cec_adap, edid); + vc4_hdmi->encoder.hdmi_monitor = drm_detect_hdmi_monitor(edid); + kfree(edid); + } + } - if (HDMI_READ(HDMI_HOTPLUG) & VC4_HDMI_HOTPLUG_CONNECTED) return connector_status_connected; + } + cec_phys_addr_invalidate(vc4_hdmi->cec_adap); return connector_status_disconnected; } @@ -639,6 +672,8 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder) return; } + vc4_hdmi_cec_update_clk_div(vc4_hdmi); + /* * FIXME: When the pixel freq is 594MHz (4k60), this needs to be setup * at 300MHz. @@ -660,9 +695,6 @@ static void vc4_hdmi_encoder_pre_crtc_configure(struct drm_encoder *encoder) return; } - if (vc4_hdmi->variant->reset) - vc4_hdmi->variant->reset(vc4_hdmi); - if (vc4_hdmi->variant->phy_init) vc4_hdmi->variant->phy_init(vc4_hdmi, mode); @@ -790,6 +822,9 @@ static int vc4_hdmi_encoder_atomic_check(struct drm_encoder *encoder, pixel_rate = mode->clock * 1000; } + if (mode->flags & DRM_MODE_FLAG_DBLCLK) + pixel_rate = pixel_rate * 2; + if (pixel_rate > vc4_hdmi->variant->max_pixel_clock) return -EINVAL; @@ -1312,13 +1347,20 @@ static irqreturn_t vc4_cec_irq_handler_thread(int irq, void *priv) static void vc4_cec_read_msg(struct vc4_hdmi *vc4_hdmi, u32 cntrl1) { + struct drm_device *dev = vc4_hdmi->connector.dev; struct cec_msg *msg = &vc4_hdmi->cec_rx_msg; unsigned int i; msg->len = 1 + ((cntrl1 & VC4_HDMI_CEC_REC_WRD_CNT_MASK) >> VC4_HDMI_CEC_REC_WRD_CNT_SHIFT); + + if (msg->len > 16) { + drm_err(dev, "Attempting to read too much data (%d)\n", msg->len); + return; + } + for (i = 0; i < msg->len; i += 4) { - u32 val = HDMI_READ(HDMI_CEC_RX_DATA_1 + i); + u32 val = HDMI_READ(HDMI_CEC_RX_DATA_1 + (i >> 2)); msg->msg[i] = val & 0xff; msg->msg[i + 1] = (val >> 8) & 0xff; @@ -1411,11 +1453,17 @@ static int vc4_hdmi_cec_adap_transmit(struct cec_adapter *adap, u8 attempts, u32 signal_free_time, struct cec_msg *msg) { struct vc4_hdmi *vc4_hdmi = cec_get_drvdata(adap); + struct drm_device *dev = vc4_hdmi->connector.dev; u32 val; unsigned int i; + if (msg->len > 16) { + drm_err(dev, "Attempting to transmit too much data (%d)\n", msg->len); + return -ENOMEM; + } + for (i = 0; i < msg->len; i += 4) - HDMI_WRITE(HDMI_CEC_TX_DATA_1 + i, + HDMI_WRITE(HDMI_CEC_TX_DATA_1 + (i >> 2), (msg->msg[i]) | (msg->msg[i + 1] << 8) | (msg->msg[i + 2] << 16) | @@ -1460,16 +1508,14 @@ static int vc4_hdmi_cec_init(struct vc4_hdmi *vc4_hdmi) cec_s_conn_info(vc4_hdmi->cec_adap, &conn_info); HDMI_WRITE(HDMI_CEC_CPU_MASK_SET, 0xffffffff); + value = HDMI_READ(HDMI_CEC_CNTRL_1); - value &= ~VC4_HDMI_CEC_DIV_CLK_CNT_MASK; - /* - * Set the logical address to Unregistered and set the clock - * divider: the hsm_clock rate and this divider setting will - * give a 40 kHz CEC clock. - */ - value |= VC4_HDMI_CEC_ADDR_MASK | - (4091 << VC4_HDMI_CEC_DIV_CLK_CNT_SHIFT); + /* Set the logical address to Unregistered */ + value |= VC4_HDMI_CEC_ADDR_MASK; HDMI_WRITE(HDMI_CEC_CNTRL_1, value); + + vc4_hdmi_cec_update_clk_div(vc4_hdmi); + ret = devm_request_threaded_irq(&pdev->dev, platform_get_irq(pdev, 0), vc4_cec_irq_handler, vc4_cec_irq_handler_thread, 0, @@ -1740,6 +1786,9 @@ static int vc4_hdmi_bind(struct device *dev, struct device *master, void *data) vc4_hdmi->disable_wifi_frequencies = of_property_read_bool(dev->of_node, "wifi-2.4ghz-coexistence"); + if (vc4_hdmi->variant->reset) + vc4_hdmi->variant->reset(vc4_hdmi); + pm_runtime_enable(dev); drm_simple_encoder_init(drm, encoder, DRM_MODE_ENCODER_TMDS); diff --git a/drivers/gpu/drm/vc4/vc4_hdmi_regs.h b/drivers/gpu/drm/vc4/vc4_hdmi_regs.h index 96d764ebfe67..5379c36f0992 100644 --- a/drivers/gpu/drm/vc4/vc4_hdmi_regs.h +++ b/drivers/gpu/drm/vc4/vc4_hdmi_regs.h @@ -29,6 +29,7 @@ enum vc4_hdmi_field { HDMI_CEC_CPU_MASK_SET, HDMI_CEC_CPU_MASK_STATUS, HDMI_CEC_CPU_STATUS, + HDMI_CEC_CPU_SET, /* * Transmit data, first byte is low byte of the 32-bit reg. @@ -196,9 +197,10 @@ static const struct vc4_hdmi_register __maybe_unused vc4_hdmi_fields[] = { VC4_HDMI_REG(HDMI_TX_PHY_RESET_CTL, 0x02c0), VC4_HDMI_REG(HDMI_TX_PHY_CTL_0, 0x02c4), VC4_HDMI_REG(HDMI_CEC_CPU_STATUS, 0x0340), + VC4_HDMI_REG(HDMI_CEC_CPU_SET, 0x0344), VC4_HDMI_REG(HDMI_CEC_CPU_CLEAR, 0x0348), VC4_HDMI_REG(HDMI_CEC_CPU_MASK_STATUS, 0x034c), - VC4_HDMI_REG(HDMI_CEC_CPU_MASK_SET, 0x034c), + VC4_HDMI_REG(HDMI_CEC_CPU_MASK_SET, 0x0350), VC4_HDMI_REG(HDMI_CEC_CPU_MASK_CLEAR, 0x0354), VC4_HDMI_REG(HDMI_RAM_PACKET_START, 0x0400), }; diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c index c30c75ee83fc..8502400b2f9c 100644 --- a/drivers/gpu/drm/virtio/virtgpu_gem.c +++ b/drivers/gpu/drm/virtio/virtgpu_gem.c @@ -39,9 +39,6 @@ static int virtio_gpu_gem_create(struct drm_file *file, int ret; u32 handle; - if (vgdev->has_virgl_3d) - virtio_gpu_create_context(dev, file); - ret = virtio_gpu_object_create(vgdev, params, &obj, NULL); if (ret < 0) return ret; @@ -119,6 +116,11 @@ int virtio_gpu_gem_object_open(struct drm_gem_object *obj, if (!vgdev->has_virgl_3d) goto out_notify; + /* the context might still be missing when the first ioctl is + * DRM_IOCTL_MODE_CREATE_DUMB or DRM_IOCTL_PRIME_FD_TO_HANDLE + */ + virtio_gpu_create_context(obj->dev, file); + objs = virtio_gpu_array_alloc(1); if (!objs) return -ENOMEM; diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c index b4ec479c32cd..b375394193be 100644 --- a/drivers/gpu/drm/virtio/virtgpu_kms.c +++ b/drivers/gpu/drm/virtio/virtgpu_kms.c @@ -163,6 +163,7 @@ int virtio_gpu_init(struct drm_device *dev) vgdev->host_visible_region.len, dev_name(&vgdev->vdev->dev))) { DRM_ERROR("Could not reserve host visible region\n"); + ret = -EBUSY; goto err_vqs; } diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c index 56172fe6995c..097cb1ee3126 100644 --- a/drivers/hid/hid-core.c +++ b/drivers/hid/hid-core.c @@ -90,7 +90,7 @@ EXPORT_SYMBOL_GPL(hid_register_report); * Register a new field for this report. */ -static struct hid_field *hid_register_field(struct hid_report *report, unsigned usages, unsigned values) +static struct hid_field *hid_register_field(struct hid_report *report, unsigned usages) { struct hid_field *field; @@ -101,7 +101,7 @@ static struct hid_field *hid_register_field(struct hid_report *report, unsigned field = kzalloc((sizeof(struct hid_field) + usages * sizeof(struct hid_usage) + - values * sizeof(unsigned)), GFP_KERNEL); + usages * sizeof(unsigned)), GFP_KERNEL); if (!field) return NULL; @@ -300,7 +300,7 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign usages = max_t(unsigned, parser->local.usage_index, parser->global.report_count); - field = hid_register_field(report, usages, parser->global.report_count); + field = hid_register_field(report, usages); if (!field) return 0; @@ -1307,6 +1307,9 @@ EXPORT_SYMBOL_GPL(hid_open_report); static s32 snto32(__u32 value, unsigned n) { + if (!value || !n) + return 0; + switch (n) { case 8: return ((__s8)value); case 16: return ((__s16)value); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 5ba0aa1d2335..b60279aaed43 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -641,6 +641,8 @@ #define USB_DEVICE_ID_INNEX_GENESIS_ATARI 0x4745 #define USB_VENDOR_ID_ITE 0x048d +#define I2C_VENDOR_ID_ITE 0x103c +#define I2C_DEVICE_ID_ITE_VOYO_WINPAD_A15 0x184f #define USB_DEVICE_ID_ITE_LENOVO_YOGA 0x8386 #define USB_DEVICE_ID_ITE_LENOVO_YOGA2 0x8350 #define I2C_DEVICE_ID_ITE_LENOVO_LEGION_Y720 0x837a diff --git a/drivers/hid/hid-ite.c b/drivers/hid/hid-ite.c index 22bfbebceaf4..14fc068affad 100644 --- a/drivers/hid/hid-ite.c +++ b/drivers/hid/hid-ite.c @@ -23,11 +23,16 @@ static __u8 *ite_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int hid_info(hdev, "Fixing up Acer Sw5-012 ITE keyboard report descriptor\n"); rdesc[163] = HID_MAIN_ITEM_RELATIVE; } - /* For Acer One S1002 keyboard-dock */ + /* For Acer One S1002/S1003 keyboard-dock */ if (*rsize == 188 && rdesc[185] == 0x81 && rdesc[186] == 0x02) { - hid_info(hdev, "Fixing up Acer S1002 ITE keyboard report descriptor\n"); + hid_info(hdev, "Fixing up Acer S1002/S1003 ITE keyboard report descriptor\n"); rdesc[186] = HID_MAIN_ITEM_RELATIVE; } + /* For Acer Aspire Switch 10E (SW3-016) keyboard-dock */ + if (*rsize == 210 && rdesc[184] == 0x81 && rdesc[185] == 0x02) { + hid_info(hdev, "Fixing up Acer Aspire Switch 10E (SW3-016) ITE keyboard report descriptor\n"); + rdesc[185] = HID_MAIN_ITEM_RELATIVE; + } } return rdesc; @@ -114,7 +119,8 @@ static const struct hid_device_id ite_devices[] = { /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */ { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC, USB_VENDOR_ID_SYNAPTICS, - USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003) }, + USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003), + .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT }, { } }; MODULE_DEVICE_TABLE(hid, ite_devices); diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c index 45e7e0bdd382..271bd8d24339 100644 --- a/drivers/hid/hid-logitech-dj.c +++ b/drivers/hid/hid-logitech-dj.c @@ -980,6 +980,7 @@ static void logi_hidpp_recv_queue_notif(struct hid_device *hdev, case 0x07: device_type = "eQUAD step 4 Gaming"; logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem); + workitem.reports_supported |= STD_KEYBOARD; break; case 0x08: device_type = "eQUAD step 4 for gamepads"; @@ -994,7 +995,12 @@ static void logi_hidpp_recv_queue_notif(struct hid_device *hdev, workitem.reports_supported |= STD_KEYBOARD; break; case 0x0d: - device_type = "eQUAD Lightspeed 1_1"; + device_type = "eQUAD Lightspeed 1.1"; + logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem); + workitem.reports_supported |= STD_KEYBOARD; + break; + case 0x0f: + device_type = "eQUAD Lightspeed 1.2"; logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem); workitem.reports_supported |= STD_KEYBOARD; break; diff --git a/drivers/hid/hid-logitech-hidpp.c b/drivers/hid/hid-logitech-hidpp.c index 7eb9a6ddb46a..d459e2dbe647 100644 --- a/drivers/hid/hid-logitech-hidpp.c +++ b/drivers/hid/hid-logitech-hidpp.c @@ -92,6 +92,8 @@ MODULE_PARM_DESC(disable_tap_to_click, #define HIDPP_CAPABILITY_BATTERY_MILEAGE BIT(2) #define HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS BIT(3) #define HIDPP_CAPABILITY_BATTERY_VOLTAGE BIT(4) +#define HIDPP_CAPABILITY_BATTERY_PERCENTAGE BIT(5) +#define HIDPP_CAPABILITY_UNIFIED_BATTERY BIT(6) #define lg_map_key_clear(c) hid_map_usage_clear(hi, usage, bit, max, EV_KEY, (c)) @@ -152,6 +154,7 @@ struct hidpp_battery { int voltage; int charge_type; bool online; + u8 supported_levels_1004; }; /** @@ -1171,7 +1174,7 @@ static int hidpp20_batterylevel_get_battery_info(struct hidpp_device *hidpp, return 0; } -static int hidpp20_query_battery_info(struct hidpp_device *hidpp) +static int hidpp20_query_battery_info_1000(struct hidpp_device *hidpp) { u8 feature_type; int ret; @@ -1208,7 +1211,7 @@ static int hidpp20_query_battery_info(struct hidpp_device *hidpp) return 0; } -static int hidpp20_battery_event(struct hidpp_device *hidpp, +static int hidpp20_battery_event_1000(struct hidpp_device *hidpp, u8 *data, int size) { struct hidpp_report *report = (struct hidpp_report *)data; @@ -1380,6 +1383,224 @@ static int hidpp20_battery_voltage_event(struct hidpp_device *hidpp, return 0; } +/* -------------------------------------------------------------------------- */ +/* 0x1004: Unified battery */ +/* -------------------------------------------------------------------------- */ + +#define HIDPP_PAGE_UNIFIED_BATTERY 0x1004 + +#define CMD_UNIFIED_BATTERY_GET_CAPABILITIES 0x00 +#define CMD_UNIFIED_BATTERY_GET_STATUS 0x10 + +#define EVENT_UNIFIED_BATTERY_STATUS_EVENT 0x00 + +#define FLAG_UNIFIED_BATTERY_LEVEL_CRITICAL BIT(0) +#define FLAG_UNIFIED_BATTERY_LEVEL_LOW BIT(1) +#define FLAG_UNIFIED_BATTERY_LEVEL_GOOD BIT(2) +#define FLAG_UNIFIED_BATTERY_LEVEL_FULL BIT(3) + +#define FLAG_UNIFIED_BATTERY_FLAGS_RECHARGEABLE BIT(0) +#define FLAG_UNIFIED_BATTERY_FLAGS_STATE_OF_CHARGE BIT(1) + +static int hidpp20_unifiedbattery_get_capabilities(struct hidpp_device *hidpp, + u8 feature_index) +{ + struct hidpp_report response; + int ret; + u8 *params = (u8 *)response.fap.params; + + if (hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS || + hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_PERCENTAGE) { + /* we have already set the device capabilities, so let's skip */ + return 0; + } + + ret = hidpp_send_fap_command_sync(hidpp, feature_index, + CMD_UNIFIED_BATTERY_GET_CAPABILITIES, + NULL, 0, &response); + /* Ignore these intermittent errors */ + if (ret == HIDPP_ERROR_RESOURCE_ERROR) + return -EIO; + if (ret > 0) { + hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n", + __func__, ret); + return -EPROTO; + } + if (ret) + return ret; + + /* + * If the device supports state of charge (battery percentage) we won't + * export the battery level information. there are 4 possible battery + * levels and they all are optional, this means that the device might + * not support any of them, we are just better off with the battery + * percentage. + */ + if (params[1] & FLAG_UNIFIED_BATTERY_FLAGS_STATE_OF_CHARGE) { + hidpp->capabilities |= HIDPP_CAPABILITY_BATTERY_PERCENTAGE; + hidpp->battery.supported_levels_1004 = 0; + } else { + hidpp->capabilities |= HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS; + hidpp->battery.supported_levels_1004 = params[0]; + } + + return 0; +} + +static int hidpp20_unifiedbattery_map_status(struct hidpp_device *hidpp, + u8 charging_status, + u8 external_power_status) +{ + int status; + + switch (charging_status) { + case 0: /* discharging */ + status = POWER_SUPPLY_STATUS_DISCHARGING; + break; + case 1: /* charging */ + case 2: /* charging slow */ + status = POWER_SUPPLY_STATUS_CHARGING; + break; + case 3: /* complete */ + status = POWER_SUPPLY_STATUS_FULL; + break; + case 4: /* error */ + status = POWER_SUPPLY_STATUS_NOT_CHARGING; + hid_info(hidpp->hid_dev, "%s: charging error", + hidpp->name); + break; + default: + status = POWER_SUPPLY_STATUS_NOT_CHARGING; + break; + } + + return status; +} + +static int hidpp20_unifiedbattery_map_level(struct hidpp_device *hidpp, + u8 battery_level) +{ + /* cler unsupported level bits */ + battery_level &= hidpp->battery.supported_levels_1004; + + if (battery_level & FLAG_UNIFIED_BATTERY_LEVEL_FULL) + return POWER_SUPPLY_CAPACITY_LEVEL_FULL; + else if (battery_level & FLAG_UNIFIED_BATTERY_LEVEL_GOOD) + return POWER_SUPPLY_CAPACITY_LEVEL_NORMAL; + else if (battery_level & FLAG_UNIFIED_BATTERY_LEVEL_LOW) + return POWER_SUPPLY_CAPACITY_LEVEL_LOW; + else if (battery_level & FLAG_UNIFIED_BATTERY_LEVEL_CRITICAL) + return POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL; + + return POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN; +} + +static int hidpp20_unifiedbattery_get_status(struct hidpp_device *hidpp, + u8 feature_index, + u8 *state_of_charge, + int *status, + int *level) +{ + struct hidpp_report response; + int ret; + u8 *params = (u8 *)response.fap.params; + + ret = hidpp_send_fap_command_sync(hidpp, feature_index, + CMD_UNIFIED_BATTERY_GET_STATUS, + NULL, 0, &response); + /* Ignore these intermittent errors */ + if (ret == HIDPP_ERROR_RESOURCE_ERROR) + return -EIO; + if (ret > 0) { + hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n", + __func__, ret); + return -EPROTO; + } + if (ret) + return ret; + + *state_of_charge = params[0]; + *status = hidpp20_unifiedbattery_map_status(hidpp, params[2], params[3]); + *level = hidpp20_unifiedbattery_map_level(hidpp, params[1]); + + return 0; +} + +static int hidpp20_query_battery_info_1004(struct hidpp_device *hidpp) +{ + u8 feature_type; + int ret; + u8 state_of_charge; + int status, level; + + if (hidpp->battery.feature_index == 0xff) { + ret = hidpp_root_get_feature(hidpp, + HIDPP_PAGE_UNIFIED_BATTERY, + &hidpp->battery.feature_index, + &feature_type); + if (ret) + return ret; + } + + ret = hidpp20_unifiedbattery_get_capabilities(hidpp, + hidpp->battery.feature_index); + if (ret) + return ret; + + ret = hidpp20_unifiedbattery_get_status(hidpp, + hidpp->battery.feature_index, + &state_of_charge, + &status, + &level); + if (ret) + return ret; + + hidpp->capabilities |= HIDPP_CAPABILITY_UNIFIED_BATTERY; + hidpp->battery.capacity = state_of_charge; + hidpp->battery.status = status; + hidpp->battery.level = level; + hidpp->battery.online = true; + + return 0; +} + +static int hidpp20_battery_event_1004(struct hidpp_device *hidpp, + u8 *data, int size) +{ + struct hidpp_report *report = (struct hidpp_report *)data; + u8 *params = (u8 *)report->fap.params; + int state_of_charge, status, level; + bool changed; + + if (report->fap.feature_index != hidpp->battery.feature_index || + report->fap.funcindex_clientid != EVENT_UNIFIED_BATTERY_STATUS_EVENT) + return 0; + + state_of_charge = params[0]; + status = hidpp20_unifiedbattery_map_status(hidpp, params[2], params[3]); + level = hidpp20_unifiedbattery_map_level(hidpp, params[1]); + + changed = status != hidpp->battery.status || + (state_of_charge != hidpp->battery.capacity && + hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_PERCENTAGE) || + (level != hidpp->battery.level && + hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS); + + if (changed) { + hidpp->battery.capacity = state_of_charge; + hidpp->battery.status = status; + hidpp->battery.level = level; + if (hidpp->battery.ps) + power_supply_changed(hidpp->battery.ps); + } + + return 0; +} + +/* -------------------------------------------------------------------------- */ +/* Battery feature helpers */ +/* -------------------------------------------------------------------------- */ + static enum power_supply_property hidpp_battery_props[] = { POWER_SUPPLY_PROP_ONLINE, POWER_SUPPLY_PROP_STATUS, @@ -3307,7 +3528,10 @@ static int hidpp_raw_hidpp_event(struct hidpp_device *hidpp, u8 *data, } if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP20_BATTERY) { - ret = hidpp20_battery_event(hidpp, data, size); + ret = hidpp20_battery_event_1000(hidpp, data, size); + if (ret != 0) + return ret; + ret = hidpp20_battery_event_1004(hidpp, data, size); if (ret != 0) return ret; ret = hidpp_solar_battery_event(hidpp, data, size); @@ -3443,9 +3667,14 @@ static int hidpp_initialize_battery(struct hidpp_device *hidpp) if (hidpp->quirks & HIDPP_QUIRK_CLASS_K750) ret = hidpp_solar_request_battery_event(hidpp); else { - ret = hidpp20_query_battery_voltage_info(hidpp); + /* we only support one battery feature right now, so let's + first check the ones that support battery level first + and leave voltage for last */ + ret = hidpp20_query_battery_info_1000(hidpp); + if (ret) + ret = hidpp20_query_battery_info_1004(hidpp); if (ret) - ret = hidpp20_query_battery_info(hidpp); + ret = hidpp20_query_battery_voltage_info(hidpp); } if (ret) @@ -3473,7 +3702,8 @@ static int hidpp_initialize_battery(struct hidpp_device *hidpp) num_battery_props = ARRAY_SIZE(hidpp_battery_props) - 3; - if (hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_MILEAGE) + if (hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_MILEAGE || + hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_PERCENTAGE) battery_props[num_battery_props++] = POWER_SUPPLY_PROP_CAPACITY; @@ -3650,8 +3880,10 @@ static void hidpp_connect_event(struct hidpp_device *hidpp) } else if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP20_BATTERY) { if (hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_VOLTAGE) hidpp20_query_battery_voltage_info(hidpp); + else if (hidpp->capabilities & HIDPP_CAPABILITY_UNIFIED_BATTERY) + hidpp20_query_battery_info_1004(hidpp); else - hidpp20_query_battery_info(hidpp); + hidpp20_query_battery_info_1000(hidpp); } if (hidpp->battery.ps) power_supply_changed(hidpp->battery.ps); diff --git a/drivers/hid/i2c-hid/i2c-hid-core.c b/drivers/hid/i2c-hid/i2c-hid-core.c index bfe716d7ea44..c586acf2fc0b 100644 --- a/drivers/hid/i2c-hid/i2c-hid-core.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -171,6 +171,8 @@ static const struct i2c_hid_quirks { I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV }, { I2C_VENDOR_ID_HANTICK, I2C_PRODUCT_ID_HANTICK_5288, I2C_HID_QUIRK_NO_IRQ_AFTER_RESET }, + { I2C_VENDOR_ID_ITE, I2C_DEVICE_ID_ITE_VOYO_WINPAD_A15, + I2C_HID_QUIRK_NO_IRQ_AFTER_RESET }, { I2C_VENDOR_ID_RAYDIUM, I2C_PRODUCT_ID_RAYDIUM_3118, I2C_HID_QUIRK_NO_IRQ_AFTER_RESET }, { USB_VENDOR_ID_ELAN, HID_ANY_ID, diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index 1bd0eb71559c..44d715c12f6a 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -2600,7 +2600,12 @@ static void wacom_wac_finger_event(struct hid_device *hdev, wacom_wac->is_invalid_bt_frame = !value; return; case HID_DG_CONTACTMAX: - features->touch_max = value; + if (!features->touch_max) { + features->touch_max = value; + } else { + hid_warn(hdev, "%s: ignoring attempt to overwrite non-zero touch_max " + "%d -> %d\n", __func__, features->touch_max, value); + } return; } diff --git a/drivers/hsi/controllers/omap_ssi_core.c b/drivers/hsi/controllers/omap_ssi_core.c index 7596dc164648..44a3f5660c10 100644 --- a/drivers/hsi/controllers/omap_ssi_core.c +++ b/drivers/hsi/controllers/omap_ssi_core.c @@ -424,7 +424,7 @@ static int ssi_hw_init(struct hsi_controller *ssi) struct omap_ssi_controller *omap_ssi = hsi_controller_drvdata(ssi); int err; - err = pm_runtime_get_sync(ssi->device.parent); + err = pm_runtime_resume_and_get(ssi->device.parent); if (err < 0) { dev_err(&ssi->device, "runtime PM failed %d\n", err); return err; diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 1d44bb635bb8..6be9f56cb627 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -1102,8 +1102,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) vmbus_device_unregister(channel->device_obj); put_device(dev); } - } - if (channel->primary_channel != NULL) { + } else if (channel->primary_channel != NULL) { /* * Sub-channel is being rescinded. Following is the channel * close sequence when initiated from the driveri (refer to diff --git a/drivers/hwmon/dell-smm-hwmon.c b/drivers/hwmon/dell-smm-hwmon.c index ec448f5f2dc3..73b9db9e3aab 100644 --- a/drivers/hwmon/dell-smm-hwmon.c +++ b/drivers/hwmon/dell-smm-hwmon.c @@ -1159,6 +1159,13 @@ static struct dmi_system_id i8k_blacklist_fan_support_dmi_table[] __initdata = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "XPS13 9333"), }, }, + { + .ident = "Dell XPS 15 L502X", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Dell System XPS L502X"), + }, + }, { } }; diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c index b20b6ff17cf6..578d4628d918 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c @@ -226,7 +226,8 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata) writel_relaxed(0x0, drvdata->base + TRCAUXCTLR); writel_relaxed(config->eventctrl0, drvdata->base + TRCEVENTCTL0R); writel_relaxed(config->eventctrl1, drvdata->base + TRCEVENTCTL1R); - writel_relaxed(config->stall_ctrl, drvdata->base + TRCSTALLCTLR); + if (drvdata->stallctl) + writel_relaxed(config->stall_ctrl, drvdata->base + TRCSTALLCTLR); writel_relaxed(config->ts_ctrl, drvdata->base + TRCTSCTLR); writel_relaxed(config->syncfreq, drvdata->base + TRCSYNCPR); writel_relaxed(config->ccctlr, drvdata->base + TRCCCCTLR); @@ -1288,7 +1289,8 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata) state->trcauxctlr = readl(drvdata->base + TRCAUXCTLR); state->trceventctl0r = readl(drvdata->base + TRCEVENTCTL0R); state->trceventctl1r = readl(drvdata->base + TRCEVENTCTL1R); - state->trcstallctlr = readl(drvdata->base + TRCSTALLCTLR); + if (drvdata->stallctl) + state->trcstallctlr = readl(drvdata->base + TRCSTALLCTLR); state->trctsctlr = readl(drvdata->base + TRCTSCTLR); state->trcsyncpr = readl(drvdata->base + TRCSYNCPR); state->trcccctlr = readl(drvdata->base + TRCCCCTLR); @@ -1355,7 +1357,8 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata) state->trcclaimset = readl(drvdata->base + TRCCLAIMCLR); - state->trcpdcr = readl(drvdata->base + TRCPDCR); + if (!drvdata->skip_power_up) + state->trcpdcr = readl(drvdata->base + TRCPDCR); /* wait for TRCSTATR.IDLE to go up */ if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1)) { @@ -1373,9 +1376,9 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata) * potentially save power on systems that respect the TRCPDCR_PU * despite requesting software to save/restore state. */ - writel_relaxed((state->trcpdcr & ~TRCPDCR_PU), - drvdata->base + TRCPDCR); - + if (!drvdata->skip_power_up) + writel_relaxed((state->trcpdcr & ~TRCPDCR_PU), + drvdata->base + TRCPDCR); out: CS_LOCK(drvdata->base); return ret; @@ -1397,7 +1400,8 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) writel_relaxed(state->trcauxctlr, drvdata->base + TRCAUXCTLR); writel_relaxed(state->trceventctl0r, drvdata->base + TRCEVENTCTL0R); writel_relaxed(state->trceventctl1r, drvdata->base + TRCEVENTCTL1R); - writel_relaxed(state->trcstallctlr, drvdata->base + TRCSTALLCTLR); + if (drvdata->stallctl) + writel_relaxed(state->trcstallctlr, drvdata->base + TRCSTALLCTLR); writel_relaxed(state->trctsctlr, drvdata->base + TRCTSCTLR); writel_relaxed(state->trcsyncpr, drvdata->base + TRCSYNCPR); writel_relaxed(state->trcccctlr, drvdata->base + TRCCCCTLR); @@ -1469,7 +1473,8 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata) writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET); - writel_relaxed(state->trcpdcr, drvdata->base + TRCPDCR); + if (!drvdata->skip_power_up) + writel_relaxed(state->trcpdcr, drvdata->base + TRCPDCR); drvdata->state_needs_restore = false; diff --git a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c index 989ce7b8ade7..4682f2613996 100644 --- a/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c +++ b/drivers/hwtracing/coresight/coresight-etm4x-sysfs.c @@ -389,7 +389,7 @@ static ssize_t mode_store(struct device *dev, config->eventctrl1 &= ~BIT(12); /* bit[8], Instruction stall bit */ - if (config->mode & ETM_MODE_ISTALL_EN) + if ((config->mode & ETM_MODE_ISTALL_EN) && (drvdata->stallctl == true)) config->stall_ctrl |= BIT(8); else config->stall_ctrl &= ~BIT(8); diff --git a/drivers/i2c/busses/i2c-bcm-iproc.c b/drivers/i2c/busses/i2c-bcm-iproc.c index d8295b1c379d..35baca2f62c4 100644 --- a/drivers/i2c/busses/i2c-bcm-iproc.c +++ b/drivers/i2c/busses/i2c-bcm-iproc.c @@ -159,6 +159,11 @@ #define IE_S_ALL_INTERRUPT_SHIFT 21 #define IE_S_ALL_INTERRUPT_MASK 0x3f +/* + * It takes ~18us to reading 10bytes of data, hence to keep tasklet + * running for less time, max slave read per tasklet is set to 10 bytes. + */ +#define MAX_SLAVE_RX_PER_INT 10 enum i2c_slave_read_status { I2C_SLAVE_RX_FIFO_EMPTY = 0, @@ -205,8 +210,18 @@ struct bcm_iproc_i2c_dev { /* bytes that have been read */ unsigned int rx_bytes; unsigned int thld_bytes; + + bool slave_rx_only; + bool rx_start_rcvd; + bool slave_read_complete; + u32 tx_underrun; + u32 slave_int_mask; + struct tasklet_struct slave_rx_tasklet; }; +/* tasklet to process slave rx data */ +static void slave_rx_tasklet_fn(unsigned long); + /* * Can be expanded in the future if more interrupt status bits are utilized */ @@ -215,7 +230,8 @@ struct bcm_iproc_i2c_dev { #define ISR_MASK_SLAVE (BIT(IS_S_START_BUSY_SHIFT)\ | BIT(IS_S_RX_EVENT_SHIFT) | BIT(IS_S_RD_EVENT_SHIFT)\ - | BIT(IS_S_TX_UNDERRUN_SHIFT)) + | BIT(IS_S_TX_UNDERRUN_SHIFT) | BIT(IS_S_RX_FIFO_FULL_SHIFT)\ + | BIT(IS_S_RX_THLD_SHIFT)) static int bcm_iproc_i2c_reg_slave(struct i2c_client *slave); static int bcm_iproc_i2c_unreg_slave(struct i2c_client *slave); @@ -259,6 +275,7 @@ static void bcm_iproc_i2c_slave_init( { u32 val; + iproc_i2c->tx_underrun = 0; if (need_reset) { /* put controller in reset */ val = iproc_i2c_rd_reg(iproc_i2c, CFG_OFFSET); @@ -295,8 +312,11 @@ static void bcm_iproc_i2c_slave_init( /* Enable interrupt register to indicate a valid byte in receive fifo */ val = BIT(IE_S_RX_EVENT_SHIFT); + /* Enable interrupt register to indicate a Master read transaction */ + val |= BIT(IE_S_RD_EVENT_SHIFT); /* Enable interrupt register for the Slave BUSY command */ val |= BIT(IE_S_START_BUSY_SHIFT); + iproc_i2c->slave_int_mask = val; iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, val); } @@ -321,76 +341,176 @@ static void bcm_iproc_i2c_check_slave_status( } } -static bool bcm_iproc_i2c_slave_isr(struct bcm_iproc_i2c_dev *iproc_i2c, - u32 status) +static void bcm_iproc_i2c_slave_read(struct bcm_iproc_i2c_dev *iproc_i2c) { + u8 rx_data, rx_status; + u32 rx_bytes = 0; u32 val; - u8 value, rx_status; - /* Slave RX byte receive */ - if (status & BIT(IS_S_RX_EVENT_SHIFT)) { + while (rx_bytes < MAX_SLAVE_RX_PER_INT) { val = iproc_i2c_rd_reg(iproc_i2c, S_RX_OFFSET); rx_status = (val >> S_RX_STATUS_SHIFT) & S_RX_STATUS_MASK; + rx_data = ((val >> S_RX_DATA_SHIFT) & S_RX_DATA_MASK); + if (rx_status == I2C_SLAVE_RX_START) { - /* Start of SMBUS for Master write */ + /* Start of SMBUS Master write */ i2c_slave_event(iproc_i2c->slave, - I2C_SLAVE_WRITE_REQUESTED, &value); - - val = iproc_i2c_rd_reg(iproc_i2c, S_RX_OFFSET); - value = (u8)((val >> S_RX_DATA_SHIFT) & S_RX_DATA_MASK); + I2C_SLAVE_WRITE_REQUESTED, &rx_data); + iproc_i2c->rx_start_rcvd = true; + iproc_i2c->slave_read_complete = false; + } else if (rx_status == I2C_SLAVE_RX_DATA && + iproc_i2c->rx_start_rcvd) { + /* Middle of SMBUS Master write */ i2c_slave_event(iproc_i2c->slave, - I2C_SLAVE_WRITE_RECEIVED, &value); - } else if (status & BIT(IS_S_RD_EVENT_SHIFT)) { - /* Start of SMBUS for Master Read */ - i2c_slave_event(iproc_i2c->slave, - I2C_SLAVE_READ_REQUESTED, &value); - iproc_i2c_wr_reg(iproc_i2c, S_TX_OFFSET, value); + I2C_SLAVE_WRITE_RECEIVED, &rx_data); + } else if (rx_status == I2C_SLAVE_RX_END && + iproc_i2c->rx_start_rcvd) { + /* End of SMBUS Master write */ + if (iproc_i2c->slave_rx_only) + i2c_slave_event(iproc_i2c->slave, + I2C_SLAVE_WRITE_RECEIVED, + &rx_data); + + i2c_slave_event(iproc_i2c->slave, I2C_SLAVE_STOP, + &rx_data); + } else if (rx_status == I2C_SLAVE_RX_FIFO_EMPTY) { + iproc_i2c->rx_start_rcvd = false; + iproc_i2c->slave_read_complete = true; + break; + } - val = BIT(S_CMD_START_BUSY_SHIFT); - iproc_i2c_wr_reg(iproc_i2c, S_CMD_OFFSET, val); + rx_bytes++; + } +} - /* - * Enable interrupt for TX FIFO becomes empty and - * less than PKT_LENGTH bytes were output on the SMBUS - */ - val = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET); - val |= BIT(IE_S_TX_UNDERRUN_SHIFT); - iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, val); - } else { - /* Master write other than start */ - value = (u8)((val >> S_RX_DATA_SHIFT) & S_RX_DATA_MASK); +static void slave_rx_tasklet_fn(unsigned long data) +{ + struct bcm_iproc_i2c_dev *iproc_i2c = (struct bcm_iproc_i2c_dev *)data; + u32 int_clr; + + bcm_iproc_i2c_slave_read(iproc_i2c); + + /* clear pending IS_S_RX_EVENT_SHIFT interrupt */ + int_clr = BIT(IS_S_RX_EVENT_SHIFT); + + if (!iproc_i2c->slave_rx_only && iproc_i2c->slave_read_complete) { + /* + * In case of single byte master-read request, + * IS_S_TX_UNDERRUN_SHIFT event is generated before + * IS_S_START_BUSY_SHIFT event. Hence start slave data send + * from first IS_S_TX_UNDERRUN_SHIFT event. + * + * This means don't send any data from slave when + * IS_S_RD_EVENT_SHIFT event is generated else it will increment + * eeprom or other backend slave driver read pointer twice. + */ + iproc_i2c->tx_underrun = 0; + iproc_i2c->slave_int_mask |= BIT(IE_S_TX_UNDERRUN_SHIFT); + + /* clear IS_S_RD_EVENT_SHIFT interrupt */ + int_clr |= BIT(IS_S_RD_EVENT_SHIFT); + } + + /* clear slave interrupt */ + iproc_i2c_wr_reg(iproc_i2c, IS_OFFSET, int_clr); + /* enable slave interrupts */ + iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, iproc_i2c->slave_int_mask); +} + +static bool bcm_iproc_i2c_slave_isr(struct bcm_iproc_i2c_dev *iproc_i2c, + u32 status) +{ + u32 val; + u8 value; + + /* + * Slave events in case of master-write, master-write-read and, + * master-read + * + * Master-write : only IS_S_RX_EVENT_SHIFT event + * Master-write-read: both IS_S_RX_EVENT_SHIFT and IS_S_RD_EVENT_SHIFT + * events + * Master-read : both IS_S_RX_EVENT_SHIFT and IS_S_RD_EVENT_SHIFT + * events or only IS_S_RD_EVENT_SHIFT + */ + if (status & BIT(IS_S_RX_EVENT_SHIFT) || + status & BIT(IS_S_RD_EVENT_SHIFT)) { + /* disable slave interrupts */ + val = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET); + val &= ~iproc_i2c->slave_int_mask; + iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, val); + + if (status & BIT(IS_S_RD_EVENT_SHIFT)) + /* Master-write-read request */ + iproc_i2c->slave_rx_only = false; + else + /* Master-write request only */ + iproc_i2c->slave_rx_only = true; + + /* schedule tasklet to read data later */ + tasklet_schedule(&iproc_i2c->slave_rx_tasklet); + + /* clear only IS_S_RX_EVENT_SHIFT interrupt */ + iproc_i2c_wr_reg(iproc_i2c, IS_OFFSET, + BIT(IS_S_RX_EVENT_SHIFT)); + } + + if (status & BIT(IS_S_TX_UNDERRUN_SHIFT)) { + iproc_i2c->tx_underrun++; + if (iproc_i2c->tx_underrun == 1) + /* Start of SMBUS for Master Read */ i2c_slave_event(iproc_i2c->slave, - I2C_SLAVE_WRITE_RECEIVED, &value); - if (rx_status == I2C_SLAVE_RX_END) - i2c_slave_event(iproc_i2c->slave, - I2C_SLAVE_STOP, &value); - } - } else if (status & BIT(IS_S_TX_UNDERRUN_SHIFT)) { - /* Master read other than start */ - i2c_slave_event(iproc_i2c->slave, - I2C_SLAVE_READ_PROCESSED, &value); + I2C_SLAVE_READ_REQUESTED, + &value); + else + /* Master read other than start */ + i2c_slave_event(iproc_i2c->slave, + I2C_SLAVE_READ_PROCESSED, + &value); iproc_i2c_wr_reg(iproc_i2c, S_TX_OFFSET, value); + /* start transfer */ val = BIT(S_CMD_START_BUSY_SHIFT); iproc_i2c_wr_reg(iproc_i2c, S_CMD_OFFSET, val); + + /* clear interrupt */ + iproc_i2c_wr_reg(iproc_i2c, IS_OFFSET, + BIT(IS_S_TX_UNDERRUN_SHIFT)); } - /* Stop */ + /* Stop received from master in case of master read transaction */ if (status & BIT(IS_S_START_BUSY_SHIFT)) { - i2c_slave_event(iproc_i2c->slave, I2C_SLAVE_STOP, &value); /* * Enable interrupt for TX FIFO becomes empty and * less than PKT_LENGTH bytes were output on the SMBUS */ - val = iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET); - val &= ~BIT(IE_S_TX_UNDERRUN_SHIFT); - iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, val); + iproc_i2c->slave_int_mask &= ~BIT(IE_S_TX_UNDERRUN_SHIFT); + iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, + iproc_i2c->slave_int_mask); + + /* End of SMBUS for Master Read */ + val = BIT(S_TX_WR_STATUS_SHIFT); + iproc_i2c_wr_reg(iproc_i2c, S_TX_OFFSET, val); + + val = BIT(S_CMD_START_BUSY_SHIFT); + iproc_i2c_wr_reg(iproc_i2c, S_CMD_OFFSET, val); + + /* flush TX FIFOs */ + val = iproc_i2c_rd_reg(iproc_i2c, S_FIFO_CTRL_OFFSET); + val |= (BIT(S_FIFO_TX_FLUSH_SHIFT)); + iproc_i2c_wr_reg(iproc_i2c, S_FIFO_CTRL_OFFSET, val); + + i2c_slave_event(iproc_i2c->slave, I2C_SLAVE_STOP, &value); + + /* clear interrupt */ + iproc_i2c_wr_reg(iproc_i2c, IS_OFFSET, + BIT(IS_S_START_BUSY_SHIFT)); } - /* clear interrupt status */ - iproc_i2c_wr_reg(iproc_i2c, IS_OFFSET, status); + /* check slave transmit status only if slave is transmitting */ + if (!iproc_i2c->slave_rx_only) + bcm_iproc_i2c_check_slave_status(iproc_i2c); - bcm_iproc_i2c_check_slave_status(iproc_i2c); return true; } @@ -505,12 +625,17 @@ static void bcm_iproc_i2c_process_m_event(struct bcm_iproc_i2c_dev *iproc_i2c, static irqreturn_t bcm_iproc_i2c_isr(int irq, void *data) { struct bcm_iproc_i2c_dev *iproc_i2c = data; - u32 status = iproc_i2c_rd_reg(iproc_i2c, IS_OFFSET); + u32 slave_status; + u32 status; bool ret; - u32 sl_status = status & ISR_MASK_SLAVE; - if (sl_status) { - ret = bcm_iproc_i2c_slave_isr(iproc_i2c, sl_status); + status = iproc_i2c_rd_reg(iproc_i2c, IS_OFFSET); + /* process only slave interrupt which are enabled */ + slave_status = status & iproc_i2c_rd_reg(iproc_i2c, IE_OFFSET) & + ISR_MASK_SLAVE; + + if (slave_status) { + ret = bcm_iproc_i2c_slave_isr(iproc_i2c, slave_status); if (ret) return IRQ_HANDLED; else @@ -1066,6 +1191,10 @@ static int bcm_iproc_i2c_reg_slave(struct i2c_client *slave) return -EAFNOSUPPORT; iproc_i2c->slave = slave; + + tasklet_init(&iproc_i2c->slave_rx_tasklet, slave_rx_tasklet_fn, + (unsigned long)iproc_i2c); + bcm_iproc_i2c_slave_init(iproc_i2c, false); return 0; } @@ -1086,6 +1215,8 @@ static int bcm_iproc_i2c_unreg_slave(struct i2c_client *slave) IE_S_ALL_INTERRUPT_SHIFT); iproc_i2c_wr_reg(iproc_i2c, IE_OFFSET, tmp); + tasklet_kill(&iproc_i2c->slave_rx_tasklet); + /* Erase the slave address programmed */ tmp = iproc_i2c_rd_reg(iproc_i2c, S_CFG_SMBUS_ADDR_OFFSET); tmp &= ~BIT(S_CFG_EN_NIC_SMB_ADDR3_SHIFT); diff --git a/drivers/i2c/busses/i2c-brcmstb.c b/drivers/i2c/busses/i2c-brcmstb.c index d4e0a0f6732a..ba766d24219e 100644 --- a/drivers/i2c/busses/i2c-brcmstb.c +++ b/drivers/i2c/busses/i2c-brcmstb.c @@ -316,7 +316,7 @@ static int brcmstb_send_i2c_cmd(struct brcmstb_i2c_dev *dev, goto cmd_out; } - if ((CMD_RD || CMD_WR) && + if ((cmd == CMD_RD || cmd == CMD_WR) && bsc_readl(dev, iic_enable) & BSC_IIC_EN_NOACK_MASK) { rc = -EREMOTEIO; dev_dbg(dev->device, "controller received NOACK intr for %s\n", diff --git a/drivers/i2c/busses/i2c-exynos5.c b/drivers/i2c/busses/i2c-exynos5.c index 20a9881a0d6c..5ac30d95650c 100644 --- a/drivers/i2c/busses/i2c-exynos5.c +++ b/drivers/i2c/busses/i2c-exynos5.c @@ -606,6 +606,7 @@ static void exynos5_i2c_message_start(struct exynos5_i2c *i2c, int stop) u32 i2c_ctl; u32 int_en = 0; u32 i2c_auto_conf = 0; + u32 i2c_addr = 0; u32 fifo_ctl; unsigned long flags; unsigned short trig_lvl; @@ -640,7 +641,12 @@ static void exynos5_i2c_message_start(struct exynos5_i2c *i2c, int stop) int_en |= HSI2C_INT_TX_ALMOSTEMPTY_EN; } - writel(HSI2C_SLV_ADDR_MAS(i2c->msg->addr), i2c->regs + HSI2C_ADDR); + i2c_addr = HSI2C_SLV_ADDR_MAS(i2c->msg->addr); + + if (i2c->op_clock >= I2C_MAX_FAST_MODE_PLUS_FREQ) + i2c_addr |= HSI2C_MASTER_ID(MASTER_ID(i2c->adap.nr)); + + writel(i2c_addr, i2c->regs + HSI2C_ADDR); writel(fifo_ctl, i2c->regs + HSI2C_FIFO_CTL); writel(i2c_ctl, i2c->regs + HSI2C_CTL); diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 046d241183c5..214b4c913a13 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -86,6 +86,9 @@ struct geni_i2c_dev { u32 clk_freq_out; const struct geni_i2c_clk_fld *clk_fld; int suspended; + void *dma_buf; + size_t xfer_len; + dma_addr_t dma_addr; }; struct geni_i2c_err_log { @@ -348,14 +351,39 @@ static void geni_i2c_tx_fsm_rst(struct geni_i2c_dev *gi2c) dev_err(gi2c->se.dev, "Timeout resetting TX_FSM\n"); } +static void geni_i2c_rx_msg_cleanup(struct geni_i2c_dev *gi2c, + struct i2c_msg *cur) +{ + gi2c->cur_rd = 0; + if (gi2c->dma_buf) { + if (gi2c->err) + geni_i2c_rx_fsm_rst(gi2c); + geni_se_rx_dma_unprep(&gi2c->se, gi2c->dma_addr, gi2c->xfer_len); + i2c_put_dma_safe_msg_buf(gi2c->dma_buf, cur, !gi2c->err); + } +} + +static void geni_i2c_tx_msg_cleanup(struct geni_i2c_dev *gi2c, + struct i2c_msg *cur) +{ + gi2c->cur_wr = 0; + if (gi2c->dma_buf) { + if (gi2c->err) + geni_i2c_tx_fsm_rst(gi2c); + geni_se_tx_dma_unprep(&gi2c->se, gi2c->dma_addr, gi2c->xfer_len); + i2c_put_dma_safe_msg_buf(gi2c->dma_buf, cur, !gi2c->err); + } +} + static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, u32 m_param) { - dma_addr_t rx_dma; + dma_addr_t rx_dma = 0; unsigned long time_left; void *dma_buf; struct geni_se *se = &gi2c->se; size_t len = msg->len; + struct i2c_msg *cur; dma_buf = i2c_get_dma_safe_msg_buf(msg, 32); if (dma_buf) @@ -370,19 +398,18 @@ static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, geni_se_select_mode(se, GENI_SE_FIFO); i2c_put_dma_safe_msg_buf(dma_buf, msg, false); dma_buf = NULL; + } else { + gi2c->xfer_len = len; + gi2c->dma_addr = rx_dma; + gi2c->dma_buf = dma_buf; } + cur = gi2c->cur; time_left = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT); if (!time_left) geni_i2c_abort_xfer(gi2c); - gi2c->cur_rd = 0; - if (dma_buf) { - if (gi2c->err) - geni_i2c_rx_fsm_rst(gi2c); - geni_se_rx_dma_unprep(se, rx_dma, len); - i2c_put_dma_safe_msg_buf(dma_buf, msg, !gi2c->err); - } + geni_i2c_rx_msg_cleanup(gi2c, cur); return gi2c->err; } @@ -390,11 +417,12 @@ static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, u32 m_param) { - dma_addr_t tx_dma; + dma_addr_t tx_dma = 0; unsigned long time_left; void *dma_buf; struct geni_se *se = &gi2c->se; size_t len = msg->len; + struct i2c_msg *cur; dma_buf = i2c_get_dma_safe_msg_buf(msg, 32); if (dma_buf) @@ -409,22 +437,21 @@ static int geni_i2c_tx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg, geni_se_select_mode(se, GENI_SE_FIFO); i2c_put_dma_safe_msg_buf(dma_buf, msg, false); dma_buf = NULL; + } else { + gi2c->xfer_len = len; + gi2c->dma_addr = tx_dma; + gi2c->dma_buf = dma_buf; } if (!dma_buf) /* Get FIFO IRQ */ writel_relaxed(1, se->base + SE_GENI_TX_WATERMARK_REG); + cur = gi2c->cur; time_left = wait_for_completion_timeout(&gi2c->done, XFER_TIMEOUT); if (!time_left) geni_i2c_abort_xfer(gi2c); - gi2c->cur_wr = 0; - if (dma_buf) { - if (gi2c->err) - geni_i2c_tx_fsm_rst(gi2c); - geni_se_tx_dma_unprep(se, tx_dma, len); - i2c_put_dma_safe_msg_buf(dma_buf, msg, !gi2c->err); - } + geni_i2c_tx_msg_cleanup(gi2c, cur); return gi2c->err; } diff --git a/drivers/i2c/busses/i2c-rcar.c b/drivers/i2c/busses/i2c-rcar.c index 217def2d7cb4..ad6630e3cc77 100644 --- a/drivers/i2c/busses/i2c-rcar.c +++ b/drivers/i2c/busses/i2c-rcar.c @@ -91,7 +91,6 @@ #define RCAR_BUS_PHASE_START (MDBS | MIE | ESG) #define RCAR_BUS_PHASE_DATA (MDBS | MIE) -#define RCAR_BUS_MASK_DATA (~(ESG | FSB) & 0xFF) #define RCAR_BUS_PHASE_STOP (MDBS | MIE | FSB) #define RCAR_IRQ_SEND (MNR | MAL | MST | MAT | MDE) @@ -120,6 +119,7 @@ enum rcar_i2c_type { }; struct rcar_i2c_priv { + u32 flags; void __iomem *io; struct i2c_adapter adap; struct i2c_msg *msg; @@ -130,7 +130,6 @@ struct rcar_i2c_priv { int pos; u32 icccr; - u32 flags; u8 recovery_icmcr; /* protected by adapter lock */ enum rcar_i2c_type devtype; struct i2c_client *slave; @@ -621,7 +620,7 @@ static bool rcar_i2c_slave_irq(struct rcar_i2c_priv *priv) /* * This driver has a lock-free design because there are IP cores (at least * R-Car Gen2) which have an inherent race condition in their hardware design. - * There, we need to clear RCAR_BUS_MASK_DATA bits as soon as possible after + * There, we need to switch to RCAR_BUS_PHASE_DATA as soon as possible after * the interrupt was generated, otherwise an unwanted repeated message gets * generated. It turned out that taking a spinlock at the beginning of the ISR * was already causing repeated messages. Thus, this driver was converted to @@ -630,13 +629,11 @@ static bool rcar_i2c_slave_irq(struct rcar_i2c_priv *priv) static irqreturn_t rcar_i2c_irq(int irq, void *ptr) { struct rcar_i2c_priv *priv = ptr; - u32 msr, val; + u32 msr; /* Clear START or STOP immediately, except for REPSTART after read */ - if (likely(!(priv->flags & ID_P_REP_AFTER_RD))) { - val = rcar_i2c_read(priv, ICMCR); - rcar_i2c_write(priv, ICMCR, val & RCAR_BUS_MASK_DATA); - } + if (likely(!(priv->flags & ID_P_REP_AFTER_RD))) + rcar_i2c_write(priv, ICMCR, RCAR_BUS_PHASE_DATA); msr = rcar_i2c_read(priv, ICMSR); diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 63ebf722a424..b4c0747c949b 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1387,7 +1387,7 @@ int i2c_handle_smbus_host_notify(struct i2c_adapter *adap, unsigned short addr) if (irq <= 0) return -ENXIO; - generic_handle_irq(irq); + generic_dispatch_irq(irq); return 0; } diff --git a/drivers/i3c/master/Kconfig b/drivers/i3c/master/Kconfig index e68f15f4b4d0..afff0e2320f7 100644 --- a/drivers/i3c/master/Kconfig +++ b/drivers/i3c/master/Kconfig @@ -25,6 +25,7 @@ config DW_I3C_MASTER config MIPI_I3C_HCI tristate "MIPI I3C Host Controller Interface driver (EXPERIMENTAL)" depends on I3C + depends on HAS_IOMEM help Support for hardware following the MIPI Aliance's I3C Host Controller Interface specification. diff --git a/drivers/ide/falconide.c b/drivers/ide/falconide.c index 77af4c1a3f38..bb86d84558d9 100644 --- a/drivers/ide/falconide.c +++ b/drivers/ide/falconide.c @@ -164,6 +164,7 @@ static int __init falconide_init(struct platform_device *pdev) if (rc) goto err_free; + platform_set_drvdata(pdev, host); return 0; err_free: ide_host_free(host); @@ -174,7 +175,7 @@ static int __init falconide_init(struct platform_device *pdev) static int falconide_remove(struct platform_device *pdev) { - struct ide_host *host = dev_get_drvdata(&pdev->dev); + struct ide_host *host = platform_get_drvdata(pdev); ide_host_remove(host); diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig index 15587a1bc80d..be1f73166a32 100644 --- a/drivers/iio/adc/Kconfig +++ b/drivers/iio/adc/Kconfig @@ -266,6 +266,8 @@ config ADI_AXI_ADC select IIO_BUFFER select IIO_BUFFER_HW_CONSUMER select IIO_BUFFER_DMAENGINE + depends on HAS_IOMEM + depends on OF help Say yes here to build support for Analog Devices Generic AXI ADC IP core. The IP core is used for interfacing with @@ -923,6 +925,7 @@ config STM32_ADC_CORE depends on ARCH_STM32 || COMPILE_TEST depends on OF depends on REGULATOR + depends on HAS_IOMEM select IIO_BUFFER select MFD_STM32_TIMERS select IIO_STM32_TIMER_TRIGGER diff --git a/drivers/iio/adc/ab8500-gpadc.c b/drivers/iio/adc/ab8500-gpadc.c index 1bb987a4acba..8d81505282dd 100644 --- a/drivers/iio/adc/ab8500-gpadc.c +++ b/drivers/iio/adc/ab8500-gpadc.c @@ -918,7 +918,7 @@ static int ab8500_gpadc_read_raw(struct iio_dev *indio_dev, return processed; /* Return millivolt or milliamps or millicentigrades */ - *val = processed * 1000; + *val = processed; return IIO_VAL_INT; } diff --git a/drivers/iio/adc/ad7949.c b/drivers/iio/adc/ad7949.c index 5d597e5050f6..1b4b3203e428 100644 --- a/drivers/iio/adc/ad7949.c +++ b/drivers/iio/adc/ad7949.c @@ -91,7 +91,7 @@ static int ad7949_spi_read_channel(struct ad7949_adc_chip *ad7949_adc, int *val, int ret; int i; int bits_per_word = ad7949_adc->resolution; - int mask = GENMASK(ad7949_adc->resolution, 0); + int mask = GENMASK(ad7949_adc->resolution - 1, 0); struct spi_message msg; struct spi_transfer tx[] = { { diff --git a/drivers/iio/adc/qcom-spmi-vadc.c b/drivers/iio/adc/qcom-spmi-vadc.c index b0388f8a69f4..7e7d408452ec 100644 --- a/drivers/iio/adc/qcom-spmi-vadc.c +++ b/drivers/iio/adc/qcom-spmi-vadc.c @@ -598,7 +598,7 @@ static const struct vadc_channels vadc_chans[] = { VADC_CHAN_NO_SCALE(P_MUX16_1_3, 1) VADC_CHAN_NO_SCALE(LR_MUX1_BAT_THERM, 0) - VADC_CHAN_NO_SCALE(LR_MUX2_BAT_ID, 0) + VADC_CHAN_VOLT(LR_MUX2_BAT_ID, 0, SCALE_DEFAULT) VADC_CHAN_NO_SCALE(LR_MUX3_XO_THERM, 0) VADC_CHAN_NO_SCALE(LR_MUX4_AMUX_THM1, 0) VADC_CHAN_NO_SCALE(LR_MUX5_AMUX_THM2, 0) diff --git a/drivers/iio/gyro/mpu3050-core.c b/drivers/iio/gyro/mpu3050-core.c index dfa31a23500f..ac90be03332a 100644 --- a/drivers/iio/gyro/mpu3050-core.c +++ b/drivers/iio/gyro/mpu3050-core.c @@ -551,6 +551,8 @@ static irqreturn_t mpu3050_trigger_handler(int irq, void *p) MPU3050_FIFO_R, &fifo_values[offset], toread); + if (ret) + goto out_trigger_unlock; dev_dbg(mpu3050->dev, "%04x %04x %04x %04x %04x\n", diff --git a/drivers/iio/humidity/hid-sensor-humidity.c b/drivers/iio/humidity/hid-sensor-humidity.c index 52f605114ef7..d62705448ae2 100644 --- a/drivers/iio/humidity/hid-sensor-humidity.c +++ b/drivers/iio/humidity/hid-sensor-humidity.c @@ -15,7 +15,10 @@ struct hid_humidity_state { struct hid_sensor_common common_attributes; struct hid_sensor_hub_attribute_info humidity_attr; - s32 humidity_data; + struct { + s32 humidity_data; + u64 timestamp __aligned(8); + } scan; int scale_pre_decml; int scale_post_decml; int scale_precision; @@ -125,9 +128,8 @@ static int humidity_proc_event(struct hid_sensor_hub_device *hsdev, struct hid_humidity_state *humid_st = iio_priv(indio_dev); if (atomic_read(&humid_st->common_attributes.data_ready)) - iio_push_to_buffers_with_timestamp(indio_dev, - &humid_st->humidity_data, - iio_get_time_ns(indio_dev)); + iio_push_to_buffers_with_timestamp(indio_dev, &humid_st->scan, + iio_get_time_ns(indio_dev)); return 0; } @@ -142,7 +144,7 @@ static int humidity_capture_sample(struct hid_sensor_hub_device *hsdev, switch (usage_id) { case HID_USAGE_SENSOR_ATMOSPHERIC_HUMIDITY: - humid_st->humidity_data = *(s32 *)raw_data; + humid_st->scan.humidity_data = *(s32 *)raw_data; return 0; default: diff --git a/drivers/iio/imu/adis16400.c b/drivers/iio/imu/adis16400.c index 54af2ed664f6..785a4ce606d8 100644 --- a/drivers/iio/imu/adis16400.c +++ b/drivers/iio/imu/adis16400.c @@ -462,8 +462,7 @@ static int adis16400_initial_setup(struct iio_dev *indio_dev) if (ret) goto err_ret; - ret = sscanf(indio_dev->name, "adis%u\n", &device_id); - if (ret != 1) { + if (sscanf(indio_dev->name, "adis%u\n", &device_id) != 1) { ret = -EINVAL; goto err_ret; } diff --git a/drivers/iio/light/hid-sensor-prox.c b/drivers/iio/light/hid-sensor-prox.c index 330cf359e0b8..e9e00ce0c6d4 100644 --- a/drivers/iio/light/hid-sensor-prox.c +++ b/drivers/iio/light/hid-sensor-prox.c @@ -23,6 +23,9 @@ struct prox_state { struct hid_sensor_common common_attributes; struct hid_sensor_hub_attribute_info prox_attr; u32 human_presence; + int scale_pre_decml; + int scale_post_decml; + int scale_precision; }; /* Channel definitions */ @@ -93,8 +96,9 @@ static int prox_read_raw(struct iio_dev *indio_dev, ret_type = IIO_VAL_INT; break; case IIO_CHAN_INFO_SCALE: - *val = prox_state->prox_attr.units; - ret_type = IIO_VAL_INT; + *val = prox_state->scale_pre_decml; + *val2 = prox_state->scale_post_decml; + ret_type = prox_state->scale_precision; break; case IIO_CHAN_INFO_OFFSET: *val = hid_sensor_convert_exponent( @@ -234,6 +238,11 @@ static int prox_parse_report(struct platform_device *pdev, HID_USAGE_SENSOR_HUMAN_PRESENCE, &st->common_attributes.sensitivity); + st->scale_precision = hid_sensor_format_scale( + hsdev->usage, + &st->prox_attr, + &st->scale_pre_decml, &st->scale_post_decml); + return ret; } diff --git a/drivers/iio/temperature/hid-sensor-temperature.c b/drivers/iio/temperature/hid-sensor-temperature.c index 81688f1b932f..da9a247097fa 100644 --- a/drivers/iio/temperature/hid-sensor-temperature.c +++ b/drivers/iio/temperature/hid-sensor-temperature.c @@ -15,7 +15,10 @@ struct temperature_state { struct hid_sensor_common common_attributes; struct hid_sensor_hub_attribute_info temperature_attr; - s32 temperature_data; + struct { + s32 temperature_data; + u64 timestamp __aligned(8); + } scan; int scale_pre_decml; int scale_post_decml; int scale_precision; @@ -32,7 +35,7 @@ static const struct iio_chan_spec temperature_channels[] = { BIT(IIO_CHAN_INFO_SAMP_FREQ) | BIT(IIO_CHAN_INFO_HYSTERESIS), }, - IIO_CHAN_SOFT_TIMESTAMP(3), + IIO_CHAN_SOFT_TIMESTAMP(1), }; /* Adjust channel real bits based on report descriptor */ @@ -123,9 +126,8 @@ static int temperature_proc_event(struct hid_sensor_hub_device *hsdev, struct temperature_state *temp_st = iio_priv(indio_dev); if (atomic_read(&temp_st->common_attributes.data_ready)) - iio_push_to_buffers_with_timestamp(indio_dev, - &temp_st->temperature_data, - iio_get_time_ns(indio_dev)); + iio_push_to_buffers_with_timestamp(indio_dev, &temp_st->scan, + iio_get_time_ns(indio_dev)); return 0; } @@ -140,7 +142,7 @@ static int temperature_capture_sample(struct hid_sensor_hub_device *hsdev, switch (usage_id) { case HID_USAGE_SENSOR_DATA_ENVIRONMENTAL_TEMPERATURE: - temp_st->temperature_data = *(s32 *)raw_data; + temp_st->scan.temperature_data = *(s32 *)raw_data; return 0; default: return -EINVAL; diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 98165589c8ab..3d194bb60840 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -3651,6 +3651,7 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, struct ib_cm_sidr_rep_param *param) { struct ib_mad_send_buf *msg; + unsigned long flags; int ret; lockdep_assert_held(&cm_id_priv->lock); @@ -3676,12 +3677,12 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv, return ret; } cm_id_priv->id.state = IB_CM_IDLE; - spin_lock_irq(&cm.lock); + spin_lock_irqsave(&cm.lock, flags); if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) { rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table); RB_CLEAR_NODE(&cm_id_priv->sidr_id_node); } - spin_unlock_irq(&cm.lock); + spin_unlock_irqrestore(&cm.lock, flags); return 0; } @@ -4333,7 +4334,7 @@ static int cm_add_one(struct ib_device *ib_device) unsigned long flags; int ret; int count = 0; - u8 i; + unsigned int i; cm_dev = kzalloc(struct_size(cm_dev, port, ib_device->phys_port_cnt), GFP_KERNEL); @@ -4345,7 +4346,7 @@ static int cm_add_one(struct ib_device *ib_device) cm_dev->going_down = 0; set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask); - for (i = 1; i <= ib_device->phys_port_cnt; i++) { + rdma_for_each_port (ib_device, i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; @@ -4431,7 +4432,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) .clr_port_cap_mask = IB_PORT_CM_SUP }; unsigned long flags; - int i; + unsigned int i; write_lock_irqsave(&cm.device_lock, flags); list_del(&cm_dev->list); @@ -4441,7 +4442,7 @@ static void cm_remove_one(struct ib_device *ib_device, void *client_data) cm_dev->going_down = 1; spin_unlock_irq(&cm.lock); - for (i = 1; i <= ib_device->phys_port_cnt; i++) { + rdma_for_each_port (ib_device, i) { if (!rdma_cap_ib_cm(ib_device, i)) continue; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index c51b84b2d2f3..e3638f80e1d5 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -352,7 +352,13 @@ struct ib_device *cma_get_ib_dev(struct cma_device *cma_dev) struct cma_multicast { struct rdma_id_private *id_priv; - struct ib_sa_multicast *sa_mc; + union { + struct ib_sa_multicast *sa_mc; + struct { + struct work_struct work; + struct rdma_cm_event event; + } iboe_join; + }; struct list_head list; void *context; struct sockaddr_storage addr; @@ -1823,6 +1829,8 @@ static void destroy_mc(struct rdma_id_private *id_priv, cma_igmp_send(ndev, &mgid, false); dev_put(ndev); } + + cancel_work_sync(&mc->iboe_join.work); } kfree(mc); } @@ -2683,6 +2691,28 @@ static int cma_query_ib_route(struct rdma_id_private *id_priv, return (id_priv->query_id < 0) ? id_priv->query_id : 0; } +static void cma_iboe_join_work_handler(struct work_struct *work) +{ + struct cma_multicast *mc = + container_of(work, struct cma_multicast, iboe_join.work); + struct rdma_cm_event *event = &mc->iboe_join.event; + struct rdma_id_private *id_priv = mc->id_priv; + int ret; + + mutex_lock(&id_priv->handler_mutex); + if (READ_ONCE(id_priv->state) == RDMA_CM_DESTROYING || + READ_ONCE(id_priv->state) == RDMA_CM_DEVICE_REMOVAL) + goto out_unlock; + + ret = cma_cm_event_handler(id_priv, event); + WARN_ON(ret); + +out_unlock: + mutex_unlock(&id_priv->handler_mutex); + if (event->event == RDMA_CM_EVENT_MULTICAST_JOIN) + rdma_destroy_ah_attr(&event->param.ud.ah_attr); +} + static void cma_work_handler(struct work_struct *_work) { struct cma_work *work = container_of(_work, struct cma_work, work); @@ -4478,10 +4508,7 @@ static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast) cma_make_mc_event(status, id_priv, multicast, &event, mc); ret = cma_cm_event_handler(id_priv, &event); rdma_destroy_ah_attr(&event.param.ud.ah_attr); - if (ret) { - destroy_id_handler_unlock(id_priv); - return 0; - } + WARN_ON(ret); out: mutex_unlock(&id_priv->handler_mutex); @@ -4604,7 +4631,6 @@ static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, struct cma_multicast *mc) { - struct cma_work *work; struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr; int err = 0; struct sockaddr *addr = (struct sockaddr *)&mc->addr; @@ -4618,10 +4644,6 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (cma_zero_addr(addr)) return -EINVAL; - work = kzalloc(sizeof *work, GFP_KERNEL); - if (!work) - return -ENOMEM; - gid_type = id_priv->cma_dev->default_gid_type[id_priv->id.port_num - rdma_start_port(id_priv->cma_dev->device)]; cma_iboe_set_mgid(addr, &ib.rec.mgid, gid_type); @@ -4632,10 +4654,9 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, if (dev_addr->bound_dev_if) ndev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); - if (!ndev) { - err = -ENODEV; - goto err_free; - } + if (!ndev) + return -ENODEV; + ib.rec.rate = iboe_get_rate(ndev); ib.rec.hop_limit = 1; ib.rec.mtu = iboe_get_mtu(ndev->mtu); @@ -4653,24 +4674,15 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv, err = -ENOTSUPP; } dev_put(ndev); - if (err || !ib.rec.mtu) { - if (!err) - err = -EINVAL; - goto err_free; - } + if (err || !ib.rec.mtu) + return err ?: -EINVAL; + rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr, &ib.rec.port_gid); - work->id = id_priv; - INIT_WORK(&work->work, cma_work_handler); - cma_make_mc_event(0, id_priv, &ib, &work->event, mc); - /* Balances with cma_id_put() in cma_work_handler */ - cma_id_get(id_priv); - queue_work(cma_wq, &work->work); + INIT_WORK(&mc->iboe_join.work, cma_iboe_join_work_handler); + cma_make_mc_event(0, id_priv, &ib, &mc->iboe_join.event, mc); + queue_work(cma_wq, &mc->iboe_join.work); return 0; - -err_free: - kfree(work); - return err; } int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr, diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c index 19104a675691..dd7f3b437c6b 100644 --- a/drivers/infiniband/core/user_mad.c +++ b/drivers/infiniband/core/user_mad.c @@ -379,6 +379,11 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, mutex_lock(&file->mutex); + if (file->agents_dead) { + mutex_unlock(&file->mutex); + return -EIO; + } + while (list_empty(&file->recv_list)) { mutex_unlock(&file->mutex); @@ -392,6 +397,11 @@ static ssize_t ib_umad_read(struct file *filp, char __user *buf, mutex_lock(&file->mutex); } + if (file->agents_dead) { + mutex_unlock(&file->mutex); + return -EIO; + } + packet = list_entry(file->recv_list.next, struct ib_umad_packet, list); list_del(&packet->list); @@ -524,7 +534,7 @@ static ssize_t ib_umad_write(struct file *filp, const char __user *buf, agent = __get_agent(file, packet->mad.hdr.id); if (!agent) { - ret = -EINVAL; + ret = -EIO; goto err_up; } @@ -653,10 +663,14 @@ static __poll_t ib_umad_poll(struct file *filp, struct poll_table_struct *wait) /* we will always be able to post a MAD send */ __poll_t mask = EPOLLOUT | EPOLLWRNORM; + mutex_lock(&file->mutex); poll_wait(filp, &file->recv_wait, wait); if (!list_empty(&file->recv_list)) mask |= EPOLLIN | EPOLLRDNORM; + if (file->agents_dead) + mask = EPOLLERR; + mutex_unlock(&file->mutex); return mask; } @@ -1336,6 +1350,7 @@ static void ib_umad_kill_port(struct ib_umad_port *port) list_for_each_entry(file, &port->file_list, port_list) { mutex_lock(&file->mutex); file->agents_dead = 1; + wake_up_interruptible(&file->recv_wait); mutex_unlock(&file->mutex); for (id = 0; id < IB_UMAD_MAX_AGENTS; ++id) diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 8769e7aa097f..81903749d241 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -3610,13 +3610,13 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id) ep->com.local_addr.ss_family == AF_INET) { err = cxgb4_remove_server_filter( ep->com.dev->rdev.lldi.ports[0], ep->stid, - ep->com.dev->rdev.lldi.rxq_ids[0], 0); + ep->com.dev->rdev.lldi.rxq_ids[0], false); } else { struct sockaddr_in6 *sin6; c4iw_init_wr_wait(ep->com.wr_waitp); err = cxgb4_remove_server( ep->com.dev->rdev.lldi.ports[0], ep->stid, - ep->com.dev->rdev.lldi.rxq_ids[0], 0); + ep->com.dev->rdev.lldi.rxq_ids[0], true); if (err) goto done; err = c4iw_wait_for_reply(&ep->com.dev->rdev, ep->com.wr_waitp, diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index ad8253245a85..54abe615b502 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -54,6 +54,7 @@ /* Hardware specification only for v1 engine */ #define HNS_ROCE_MIN_CQE_NUM 0x40 #define HNS_ROCE_MIN_WQE_NUM 0x20 +#define HNS_ROCE_MIN_SRQ_WQE_NUM 1 /* Hardware specification only for v1 engine */ #define HNS_ROCE_MAX_INNER_MTPT_NUM 0x7 @@ -65,6 +66,8 @@ #define HNS_ROCE_CQE_WCMD_EMPTY_BIT 0x2 #define HNS_ROCE_MIN_CQE_CNT 16 +#define HNS_ROCE_RESERVED_SGE 1 + #define HNS_ROCE_MAX_IRQ_NUM 128 #define HNS_ROCE_SGE_IN_WQE 2 @@ -393,6 +396,7 @@ struct hns_roce_wq { spinlock_t lock; u32 wqe_cnt; /* WQE num */ u32 max_gs; + u32 rsv_sge; int offset; int wqe_shift; /* WQE size */ u32 head; @@ -489,6 +493,8 @@ struct hns_roce_idx_que { struct hns_roce_mtr mtr; int entry_shift; unsigned long *bitmap; + u32 head; + u32 tail; }; struct hns_roce_srq { @@ -496,6 +502,7 @@ struct hns_roce_srq { unsigned long srqn; u32 wqe_cnt; int max_gs; + u32 rsv_sge; int wqe_shift; void __iomem *db_reg_l; @@ -507,8 +514,6 @@ struct hns_roce_srq { u64 *wrid; struct hns_roce_idx_que idx_que; spinlock_t lock; - u16 head; - u16 tail; struct mutex mutex; void (*event)(struct hns_roce_srq *srq, enum hns_roce_event event); }; @@ -647,7 +652,7 @@ struct hns_roce_qp { struct hns_roce_db sdb; unsigned long en_flags; u32 doorbell_qpn; - u32 sq_signal_bits; + enum ib_sig_type sq_signal_bits; struct hns_roce_wq sq; struct hns_roce_mtr mtr; diff --git a/drivers/infiniband/hw/hns/hns_roce_hem.c b/drivers/infiniband/hw/hns/hns_roce_hem.c index edc9d6b98d95..cfd2e1b60c7f 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hem.c +++ b/drivers/infiniband/hw/hns/hns_roce_hem.c @@ -1075,9 +1075,8 @@ static struct roce_hem_item *hem_list_alloc_item(struct hns_roce_dev *hr_dev, return NULL; if (exist_bt) { - hem->addr = dma_alloc_coherent(hr_dev->dev, - count * BA_BYTE_LEN, - &hem->dma_addr, GFP_KERNEL); + hem->addr = dma_alloc_coherent(hr_dev->dev, count * BA_BYTE_LEN, + &hem->dma_addr, GFP_KERNEL); if (!hem->addr) { kfree(hem); return NULL; @@ -1336,6 +1335,10 @@ static int hem_list_alloc_root_bt(struct hns_roce_dev *hr_dev, if (ba_num < 1) return -ENOMEM; + if (ba_num > unit) + return -ENOBUFS; + + ba_num = min_t(int, ba_num, unit); INIT_LIST_HEAD(&temp_root); offset = r->offset; /* indicate to last region */ diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c index f68585ff8e8a..c2539a8d9111 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v1.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v1.c @@ -43,6 +43,22 @@ #include "hns_roce_hem.h" #include "hns_roce_hw_v1.h" +/** + * hns_get_gid_index - Get gid index. + * @hr_dev: pointer to structure hns_roce_dev. + * @port: port, value range: 0 ~ MAX + * @gid_index: gid_index, value range: 0 ~ MAX + * Description: + * N ports shared gids, allocation method as follow: + * GID[0][0], GID[1][0],.....GID[N - 1][0], + * GID[0][0], GID[1][0],.....GID[N - 1][0], + * And so on + */ +u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) +{ + return gid_index * hr_dev->caps.num_ports + port; +} + static void set_data_seg(struct hns_roce_wqe_data_seg *dseg, struct ib_sge *sg) { dseg->lkey = cpu_to_le32(sg->lkey); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 833e1f259936..0f76e193317e 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -741,6 +741,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, unsigned long flags; void *wqe = NULL; u32 wqe_idx; + u32 max_sge; int nreq; int ret; int i; @@ -754,6 +755,7 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, goto out; } + max_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { if (unlikely(hns_roce_wq_overflow(&hr_qp->rq, nreq, hr_qp->ibqp.recv_cq))) { @@ -764,9 +766,9 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, wqe_idx = (hr_qp->rq.head + nreq) & (hr_qp->rq.wqe_cnt - 1); - if (unlikely(wr->num_sge > hr_qp->rq.max_gs)) { + if (unlikely(wr->num_sge > max_sge)) { ibdev_err(ibdev, "num_sge = %d >= max_sge = %u.\n", - wr->num_sge, hr_qp->rq.max_gs); + wr->num_sge, max_sge); ret = -EINVAL; *bad_wr = wr; goto out; @@ -781,9 +783,10 @@ static int hns_roce_v2_post_recv(struct ib_qp *ibqp, dseg++; } - if (wr->num_sge < hr_qp->rq.max_gs) { + if (hr_qp->rq.rsv_sge) { dseg->lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); dseg->addr = 0; + dseg->len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); } /* rq support inline data */ @@ -846,11 +849,20 @@ static void hns_roce_free_srq_wqe(struct hns_roce_srq *srq, int wqe_index) spin_lock(&srq->lock); bitmap_clear(srq->idx_que.bitmap, wqe_index, 1); - srq->tail++; + srq->idx_que.tail++; spin_unlock(&srq->lock); } +int hns_roce_srqwq_overflow(struct hns_roce_srq *srq, int nreq) +{ + struct hns_roce_idx_que *idx_que = &srq->idx_que; + unsigned int cur; + + cur = idx_que->head - idx_que->tail; + return cur + nreq >= srq->wqe_cnt; +} + static int find_empty_entry(struct hns_roce_idx_que *idx_que, unsigned long size) { @@ -879,22 +891,27 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, __le32 *srq_idx; int ret = 0; int wqe_idx; + u32 max_sge; void *wqe; int nreq; int i; spin_lock_irqsave(&srq->lock, flags); - ind = srq->head & (srq->wqe_cnt - 1); + ind = srq->idx_que.head & (srq->wqe_cnt - 1); + max_sge = srq->max_gs - srq->rsv_sge; for (nreq = 0; wr; ++nreq, wr = wr->next) { - if (unlikely(wr->num_sge >= srq->max_gs)) { + if (unlikely(wr->num_sge > max_sge)) { + ibdev_err(&hr_dev->ib_dev, + "srq: num_sge = %d, max_sge = %u.\n", + wr->num_sge, max_sge); ret = -EINVAL; *bad_wr = wr; break; } - if (unlikely(srq->head == srq->tail)) { + if (unlikely(hns_roce_srqwq_overflow(srq, nreq))) { ret = -ENOMEM; *bad_wr = wr; break; @@ -916,9 +933,9 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, dseg[i].addr = cpu_to_le64(wr->sg_list[i].addr); } - if (wr->num_sge < srq->max_gs) { - dseg[i].len = 0; - dseg[i].lkey = cpu_to_le32(0x100); + if (srq->rsv_sge) { + dseg[i].len = cpu_to_le32(HNS_ROCE_INVALID_SGE_LENGTH); + dseg[i].lkey = cpu_to_le32(HNS_ROCE_INVALID_LKEY); dseg[i].addr = 0; } @@ -930,7 +947,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, } if (likely(nreq)) { - srq->head += nreq; + srq->idx_que.head += nreq; /* * Make sure that descriptors are written before @@ -942,7 +959,7 @@ static int hns_roce_v2_post_srq_recv(struct ib_srq *ibsrq, cpu_to_le32(HNS_ROCE_V2_SRQ_DB << V2_DB_BYTE_4_CMD_S | (srq->srqn & V2_DB_BYTE_4_TAG_M)); srq_db.parameter = - cpu_to_le32(srq->head & V2_DB_PARAMETER_IDX_M); + cpu_to_le32(srq->idx_que.head & V2_DB_PARAMETER_IDX_M); hns_roce_write64(hr_dev, (__le32 *)&srq_db, srq->db_reg_l); } @@ -1247,7 +1264,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, u32 timeout = 0; int handle = 0; u16 desc_ret; - int ret = 0; + int ret; int ntc; spin_lock_bh(&csq->lock); @@ -1292,15 +1309,14 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, if (hns_roce_cmq_csq_done(hr_dev)) { complete = true; handle = 0; + ret = 0; while (handle < num) { /* get the result of hardware write back */ desc_to_use = &csq->desc[ntc]; desc[handle] = *desc_to_use; dev_dbg(hr_dev->dev, "Get cmq desc:\n"); desc_ret = le16_to_cpu(desc[handle].retval); - if (desc_ret == CMD_EXEC_SUCCESS) - ret = 0; - else + if (unlikely(desc_ret != CMD_EXEC_SUCCESS)) ret = -EIO; priv->cmq.last_status = desc_ret; ntc++; @@ -1866,7 +1882,6 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->flags = HNS_ROCE_CAP_FLAG_REREG_MR | HNS_ROCE_CAP_FLAG_ROCE_V1_V2 | - HNS_ROCE_CAP_FLAG_RQ_INLINE | HNS_ROCE_CAP_FLAG_RECORD_DB | HNS_ROCE_CAP_FLAG_SQ_RECORD_DB; @@ -1999,10 +2014,12 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->max_sq_sg = le16_to_cpu(resp_a->max_sq_sg); caps->max_sq_inline = le16_to_cpu(resp_a->max_sq_inline); caps->max_rq_sg = le16_to_cpu(resp_a->max_rq_sg); + caps->max_rq_sg = roundup_pow_of_two(caps->max_rq_sg); caps->max_extend_sg = le32_to_cpu(resp_a->max_extend_sg); caps->num_qpc_timer = le16_to_cpu(resp_a->num_qpc_timer); caps->num_cqc_timer = le16_to_cpu(resp_a->num_cqc_timer); caps->max_srq_sges = le16_to_cpu(resp_a->max_srq_sges); + caps->max_srq_sges = roundup_pow_of_two(caps->max_srq_sges); caps->num_aeq_vectors = resp_a->num_aeq_vectors; caps->num_other_vectors = resp_a->num_other_vectors; caps->max_sq_desc_sz = resp_a->max_sq_desc_sz; @@ -4235,7 +4252,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, struct hns_roce_v2_qp_context *context, struct hns_roce_v2_qp_context *qpc_mask) { - const struct ib_global_route *grh = rdma_ah_read_grh(&attr->ah_attr); struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); struct ib_device *ibdev = &hr_dev->ib_dev; @@ -4243,7 +4259,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, dma_addr_t irrl_ba; enum ib_mtu mtu; u8 lp_pktn_ini; - u8 port_num; u64 *mtts; u8 *dmac; u8 *smac; @@ -4324,15 +4339,6 @@ static int modify_qp_init_to_rtr(struct ib_qp *ibqp, V2_QPC_BYTE_56_DQPN_M, V2_QPC_BYTE_56_DQPN_S, 0); } - /* Configure GID index */ - port_num = rdma_ah_get_port_num(&attr->ah_attr); - roce_set_field(context->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, - hns_get_gid_index(hr_dev, port_num - 1, - grh->sgid_index)); - roce_set_field(qpc_mask->byte_20_smac_sgid_idx, - V2_QPC_BYTE_20_SGID_IDX_M, V2_QPC_BYTE_20_SGID_IDX_S, 0); - memcpy(&(context->dmac), dmac, sizeof(u32)); roce_set_field(context->byte_52_udpspn_dmac, V2_QPC_BYTE_52_DMAC_M, V2_QPC_BYTE_52_DMAC_S, *((u16 *)(&dmac[4]))); @@ -5083,7 +5089,7 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, done: qp_attr->cur_qp_state = qp_attr->qp_state; qp_attr->cap.max_recv_wr = hr_qp->rq.wqe_cnt; - qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs; + qp_attr->cap.max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; if (!ibqp->uobject) { qp_attr->cap.max_send_wr = hr_qp->sq.wqe_cnt; @@ -5331,7 +5337,7 @@ static int hns_roce_v2_modify_srq(struct ib_srq *ibsrq, return -EINVAL; if (srq_attr_mask & IB_SRQ_LIMIT) { - if (srq_attr->srq_limit >= srq->wqe_cnt) + if (srq_attr->srq_limit > srq->wqe_cnt) return -EINVAL; mailbox = hns_roce_alloc_cmd_mailbox(hr_dev); @@ -5394,8 +5400,8 @@ static int hns_roce_v2_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) SRQC_BYTE_8_SRQ_LIMIT_WL_S); attr->srq_limit = limit_wl; - attr->max_wr = srq->wqe_cnt - 1; - attr->max_sge = srq->max_gs; + attr->max_wr = srq->wqe_cnt; + attr->max_sge = srq->max_gs - srq->rsv_sge; out: hns_roce_free_cmd_mailbox(hr_dev, mailbox); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index bdaccf86460d..09d88d97a7ff 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -96,7 +96,8 @@ #define HNS_ROCE_V2_CQC_TIMER_ENTRY_SZ PAGE_SIZE #define HNS_ROCE_V2_PAGE_SIZE_SUPPORTED 0xFFFFF000 #define HNS_ROCE_V2_MAX_INNER_MTPT_NUM 2 -#define HNS_ROCE_INVALID_LKEY 0x100 +#define HNS_ROCE_INVALID_LKEY 0x0 +#define HNS_ROCE_INVALID_SGE_LENGTH 0x80000000 #define HNS_ROCE_CMQ_TX_TIMEOUT 30000 #define HNS_ROCE_V2_UC_RC_SGE_NUM_IN_WQE 2 #define HNS_ROCE_V2_RSV_QPS 8 diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index d9179bae4989..baadb12b1375 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -42,22 +42,6 @@ #include "hns_roce_device.h" #include "hns_roce_hem.h" -/** - * hns_get_gid_index - Get gid index. - * @hr_dev: pointer to structure hns_roce_dev. - * @port: port, value range: 0 ~ MAX - * @gid_index: gid_index, value range: 0 ~ MAX - * Description: - * N ports shared gids, allocation method as follow: - * GID[0][0], GID[1][0],.....GID[N - 1][0], - * GID[0][0], GID[1][0],.....GID[N - 1][0], - * And so on - */ -u8 hns_get_gid_index(struct hns_roce_dev *hr_dev, u8 port, int gid_index) -{ - return gid_index * hr_dev->caps.num_ports + port; -} - static int hns_roce_set_mac(struct hns_roce_dev *hr_dev, u8 port, u8 *addr) { u8 phy_port; @@ -772,8 +756,7 @@ static int hns_roce_setup_hca(struct hns_roce_dev *hr_dev) return 0; err_qp_table_free: - if (hr_dev->caps.flags & HNS_ROCE_CAP_FLAG_SRQ) - hns_roce_cleanup_qp_table(hr_dev); + hns_roce_cleanup_qp_table(hr_dev); err_cq_table_free: hns_roce_cleanup_cq_table(hr_dev); diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 1bcffd93ff3e..1e0465f05b7d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -631,30 +631,26 @@ int hns_roce_dealloc_mw(struct ib_mw *ibmw) } static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, - dma_addr_t *pages, struct hns_roce_buf_region *region) + struct hns_roce_buf_region *region, dma_addr_t *pages, + int max_count) { + int count, npage; + int offset, end; __le64 *mtts; - int offset; - int count; - int npage; u64 addr; - int end; int i; - /* if hopnum is 0, buffer cannot store BAs, so skip write mtt */ - if (!region->hopnum) - return 0; - offset = region->offset; end = offset + region->count; npage = 0; - while (offset < end) { + while (offset < end && npage < max_count) { + count = 0; mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list, offset, &count, NULL); if (!mtts) return -ENOBUFS; - for (i = 0; i < count; i++) { + for (i = 0; i < count && npage < max_count; i++) { if (hr_dev->hw_rev == HNS_ROCE_HW_VER1) addr = to_hr_hw_page_addr(pages[npage]); else @@ -666,7 +662,7 @@ static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, offset += count; } - return 0; + return npage; } static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr) @@ -833,8 +829,8 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, { struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_buf_region *r; - unsigned int i; - int err; + unsigned int i, mapped_cnt; + int ret; /* * Only use the first page address as root ba when hopnum is 0, this @@ -845,26 +841,42 @@ int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, return 0; } - for (i = 0; i < mtr->hem_cfg.region_count; i++) { + for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count && + mapped_cnt < page_cnt; i++) { r = &mtr->hem_cfg.region[i]; + /* if hopnum is 0, no need to map pages in this region */ + if (!r->hopnum) { + mapped_cnt += r->count; + continue; + } + if (r->offset + r->count > page_cnt) { - err = -EINVAL; + ret = -EINVAL; ibdev_err(ibdev, "failed to check mtr%u end %u + %u, max %u.\n", i, r->offset, r->count, page_cnt); - return err; + return ret; } - err = mtr_map_region(hr_dev, mtr, &pages[r->offset], r); - if (err) { + ret = mtr_map_region(hr_dev, mtr, r, &pages[r->offset], + page_cnt - mapped_cnt); + if (ret < 0) { ibdev_err(ibdev, "failed to map mtr%u offset %u, ret = %d.\n", - i, r->offset, err); - return err; + i, r->offset, ret); + return ret; } + mapped_cnt += ret; + ret = 0; } - return 0; + if (mapped_cnt < page_cnt) { + ret = -ENOBUFS; + ibdev_err(ibdev, "failed to map mtr pages count: %u < %u.\n", + mapped_cnt, page_cnt); + } + + return ret; } int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr, diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index 1116371adf74..8695c96e6696 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -413,9 +413,32 @@ static void free_qpn(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp) mutex_unlock(&hr_dev->qp_table.bank_mutex); } +static u32 proc_rq_sge(struct hns_roce_dev *dev, struct hns_roce_qp *hr_qp, + bool user) +{ + u32 max_sge = dev->caps.max_rq_sg; + + if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return max_sge; + + /* Reserve SGEs only for HIP08 in kernel; The userspace driver will + * calculate number of max_sge with reserved SGEs when allocating wqe + * buf, so there is no need to do this again in kernel. But the number + * may exceed the capacity of SGEs recorded in the firmware, so the + * kernel driver should just adapt the value accordingly. + */ + if (user) + max_sge = roundup_pow_of_two(max_sge + 1); + else + hr_qp->rq.rsv_sge = 1; + + return max_sge; +} + static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, - struct hns_roce_qp *hr_qp, int has_rq) + struct hns_roce_qp *hr_qp, int has_rq, bool user) { + u32 max_sge = proc_rq_sge(hr_dev, hr_qp, user); u32 cnt; /* If srq exist, set zero for relative number of rq */ @@ -431,8 +454,9 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, /* Check the validity of QP support capacity */ if (!cap->max_recv_wr || cap->max_recv_wr > hr_dev->caps.max_wqes || - cap->max_recv_sge > hr_dev->caps.max_rq_sg) { - ibdev_err(&hr_dev->ib_dev, "RQ config error, depth=%u, sge=%d\n", + cap->max_recv_sge > max_sge) { + ibdev_err(&hr_dev->ib_dev, + "RQ config error, depth = %u, sge = %u\n", cap->max_recv_wr, cap->max_recv_sge); return -EINVAL; } @@ -444,7 +468,8 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, return -EINVAL; } - hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge)); + hr_qp->rq.max_gs = roundup_pow_of_two(max(1U, cap->max_recv_sge) + + hr_qp->rq.rsv_sge); if (hr_dev->caps.max_rq_sg <= HNS_ROCE_SGE_IN_WQE) hr_qp->rq.wqe_shift = ilog2(hr_dev->caps.max_rq_desc_sz); @@ -459,7 +484,7 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, hr_qp->rq_inl_buf.wqe_cnt = 0; cap->max_recv_wr = cnt; - cap->max_recv_sge = hr_qp->rq.max_gs; + cap->max_recv_sge = hr_qp->rq.max_gs - hr_qp->rq.rsv_sge; return 0; } @@ -919,7 +944,7 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, hr_qp->sq_signal_bits = IB_SIGNAL_REQ_WR; ret = set_rq_size(hr_dev, &init_attr->cap, hr_qp, - hns_roce_qp_has_rq(init_attr)); + hns_roce_qp_has_rq(init_attr), !!udata); if (ret) { ibdev_err(ibdev, "failed to set user RQ size, ret = %d.\n", ret); diff --git a/drivers/infiniband/hw/hns/hns_roce_srq.c b/drivers/infiniband/hw/hns/hns_roce_srq.c index c4ae57e4173a..51de9305bb4d 100644 --- a/drivers/infiniband/hw/hns/hns_roce_srq.c +++ b/drivers/infiniband/hw/hns/hns_roce_srq.c @@ -3,6 +3,7 @@ * Copyright (c) 2018 Hisilicon Limited. */ +#include #include #include "hns_roce_device.h" #include "hns_roce_cmd.h" @@ -246,6 +247,9 @@ static int alloc_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq, } } + idx_que->head = 0; + idx_que->tail = 0; + return 0; err_idx_mtr: hns_roce_mtr_destroy(hr_dev, &idx_que->mtr); @@ -264,8 +268,6 @@ static void free_srq_idx(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) static int alloc_srq_wrid(struct hns_roce_dev *hr_dev, struct hns_roce_srq *srq) { - srq->head = 0; - srq->tail = srq->wqe_cnt - 1; srq->wrid = kvmalloc_array(srq->wqe_cnt, sizeof(u64), GFP_KERNEL); if (!srq->wrid) return -ENOMEM; @@ -279,6 +281,28 @@ static void free_srq_wrid(struct hns_roce_srq *srq) srq->wrid = NULL; } +static u32 proc_srq_sge(struct hns_roce_dev *dev, struct hns_roce_srq *hr_srq, + bool user) +{ + u32 max_sge = dev->caps.max_srq_sges; + + if (dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + return max_sge; + + /* Reserve SGEs only for HIP08 in kernel; The userspace driver will + * calculate number of max_sge with reserved SGEs when allocating wqe + * buf, so there is no need to do this again in kernel. But the number + * may exceed the capacity of SGEs recorded in the firmware, so the + * kernel driver should just adapt the value accordingly. + */ + if (user) + max_sge = roundup_pow_of_two(max_sge + 1); + else + hr_srq->rsv_sge = 1; + + return max_sge; +} + int hns_roce_create_srq(struct ib_srq *ib_srq, struct ib_srq_init_attr *init_attr, struct ib_udata *udata) @@ -288,6 +312,7 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, struct hns_roce_srq *srq = to_hr_srq(ib_srq); struct ib_device *ibdev = &hr_dev->ib_dev; struct hns_roce_ib_create_srq ucmd = {}; + u32 max_sge; int ret; u32 cqn; @@ -295,16 +320,27 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, init_attr->srq_type != IB_SRQT_XRC) return -EOPNOTSUPP; - /* Check the actual SRQ wqe and SRQ sge num */ - if (init_attr->attr.max_wr >= hr_dev->caps.max_srq_wrs || - init_attr->attr.max_sge > hr_dev->caps.max_srq_sges) + max_sge = proc_srq_sge(hr_dev, srq, !!udata); + + if (init_attr->attr.max_wr > hr_dev->caps.max_srq_wrs || + init_attr->attr.max_sge > max_sge) { + ibdev_err(&hr_dev->ib_dev, + "SRQ config error, depth = %u, sge = %d\n", + init_attr->attr.max_wr, init_attr->attr.max_sge); return -EINVAL; + } mutex_init(&srq->mutex); spin_lock_init(&srq->lock); - srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr + 1); - srq->max_gs = init_attr->attr.max_sge; + init_attr->attr.max_wr = max_t(u32, init_attr->attr.max_wr, + HNS_ROCE_MIN_SRQ_WQE_NUM); + srq->wqe_cnt = roundup_pow_of_two(init_attr->attr.max_wr); + srq->max_gs = + roundup_pow_of_two(init_attr->attr.max_sge + srq->rsv_sge); + init_attr->attr.max_wr = srq->wqe_cnt; + init_attr->attr.max_sge = srq->max_gs; + init_attr->attr.srq_limit = 0; if (udata) { ret = ib_copy_from_udata(&ucmd, udata, @@ -351,6 +387,8 @@ int hns_roce_create_srq(struct ib_srq *ib_srq, srq->event = hns_roce_ib_srq_event; resp.srqn = srq->srqn; + srq->max_gs = init_attr->attr.max_sge; + init_attr->attr.max_sge = srq->max_gs - srq->rsv_sge; if (udata) { ret = ib_copy_to_udata(udata, &resp, diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index 819c142857d6..8161035eb774 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -1064,7 +1064,9 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din, MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_RQT); break; case MLX5_CMD_OP_CREATE_TIR: - MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + *obj_id = MLX5_GET(create_tir_out, out, tirn); + MLX5_SET(destroy_tir_in, din, opcode, MLX5_CMD_OP_DESTROY_TIR); + MLX5_SET(destroy_tir_in, din, tirn, *obj_id); break; case MLX5_CMD_OP_CREATE_TIS: MLX5_SET(general_obj_in_cmd_hdr, din, opcode, MLX5_CMD_OP_DESTROY_TIS); @@ -1968,8 +1970,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)( num_alloc_xa_entries++; event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL); - if (!event_sub) + if (!event_sub) { + err = -ENOMEM; goto err; + } list_add_tail(&event_sub->event_list, &sub_list); uverbs_uobject_get(&ev_file->uobj); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index aabdc07e4753..3562e69eacb1 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3927,7 +3927,7 @@ static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) mlx5_ib_cleanup_multiport_master(dev); WARN_ON(!xa_empty(&dev->odp_mkeys)); cleanup_srcu_struct(&dev->odp_srcu); - + mutex_destroy(&dev->cap_mask_mutex); WARN_ON(!xa_empty(&dev->sig_mrs)); WARN_ON(!bitmap_empty(dev->dm.memic_alloc_pages, MLX5_MAX_MEMIC_PAGES)); } @@ -3978,6 +3978,10 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) dev->ib_dev.dev.parent = mdev->device; dev->ib_dev.lag_flags = RDMA_LAG_FLAGS_HASH_ALL_SLAVES; + err = init_srcu_struct(&dev->odp_srcu); + if (err) + goto err_mp; + mutex_init(&dev->cap_mask_mutex); INIT_LIST_HEAD(&dev->qp_list); spin_lock_init(&dev->reset_flow_resource_lock); @@ -3987,17 +3991,11 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) spin_lock_init(&dev->dm.lock); dev->dm.dev = mdev; - - err = init_srcu_struct(&dev->odp_srcu); - if (err) - goto err_mp; - return 0; err_mp: mlx5_ib_cleanup_multiport_master(dev); - - return -ENOMEM; + return err; } static int mlx5_ib_enable_driver(struct ib_device *dev) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 0cb7cc642d87..bab40ad527da 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2432,9 +2432,6 @@ static int check_qp_type(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, case MLX5_IB_QPT_HW_GSI: case IB_QPT_DRIVER: case IB_QPT_GSI: - if (dev->profile == &raw_eth_profile) - goto out; - fallthrough; case IB_QPT_RAW_PACKET: case IB_QPT_UD: case MLX5_IB_QPT_REG_UMR: @@ -2629,10 +2626,6 @@ static int process_create_flags(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, int create_flags = attr->create_flags; bool cond; - if (qp->type == IB_QPT_UD && dev->profile == &raw_eth_profile) - if (create_flags & ~MLX5_IB_QP_CREATE_WC_TEST) - return -EINVAL; - if (qp_type == MLX5_IB_QPT_DCT) return (create_flags) ? -EINVAL : 0; @@ -4211,6 +4204,23 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, return 0; } +static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev, + struct mlx5_ib_qp *qp, + enum ib_qp_type qp_type) +{ + if (dev->profile != &raw_eth_profile) + return true; + + if (qp_type == IB_QPT_RAW_PACKET || qp_type == MLX5_IB_QPT_REG_UMR) + return true; + + /* Internal QP used for wc testing, with NOPs in wq */ + if (qp->flags & MLX5_IB_QP_CREATE_WC_TEST) + return true; + + return false; +} + int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask, struct ib_udata *udata) { @@ -4223,6 +4233,9 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int err = -EINVAL; int port; + if (!mlx5_ib_modify_qp_allowed(dev, qp, ibqp->qp_type)) + return -EOPNOTSUPP; + if (attr_mask & ~(IB_QP_ATTR_STANDARD_BITS | IB_QP_RATE_LIMIT)) return -EOPNOTSUPP; diff --git a/drivers/infiniband/sw/rxe/Kconfig b/drivers/infiniband/sw/rxe/Kconfig index 452149066792..06b8dc5093f7 100644 --- a/drivers/infiniband/sw/rxe/Kconfig +++ b/drivers/infiniband/sw/rxe/Kconfig @@ -4,6 +4,7 @@ config RDMA_RXE depends on INET && PCI && INFINIBAND depends on INFINIBAND_VIRT_DMA select NET_UDP_TUNNEL + select CRYPTO select CRYPTO_CRC32 help This driver implements the InfiniBand RDMA transport over diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 943914c2a50c..bce44502ab0e 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -414,6 +414,11 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb) void rxe_loopback(struct sk_buff *skb) { + if (skb->protocol == htons(ETH_P_IP)) + skb_pull(skb, sizeof(struct iphdr)); + else + skb_pull(skb, sizeof(struct ipv6hdr)); + rxe_rcv(skb); } diff --git a/drivers/infiniband/sw/rxe/rxe_recv.c b/drivers/infiniband/sw/rxe/rxe_recv.c index c9984a28eecc..cb69a125e280 100644 --- a/drivers/infiniband/sw/rxe/rxe_recv.c +++ b/drivers/infiniband/sw/rxe/rxe_recv.c @@ -9,21 +9,26 @@ #include "rxe.h" #include "rxe_loc.h" +/* check that QP matches packet opcode type and is in a valid state */ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, struct rxe_qp *qp) { + unsigned int pkt_type; + if (unlikely(!qp->valid)) goto err1; + pkt_type = pkt->opcode & 0xe0; + switch (qp_type(qp)) { case IB_QPT_RC: - if (unlikely((pkt->opcode & IB_OPCODE_RC) != 0)) { + if (unlikely(pkt_type != IB_OPCODE_RC)) { pr_warn_ratelimited("bad qp type\n"); goto err1; } break; case IB_QPT_UC: - if (unlikely(!(pkt->opcode & IB_OPCODE_UC))) { + if (unlikely(pkt_type != IB_OPCODE_UC)) { pr_warn_ratelimited("bad qp type\n"); goto err1; } @@ -31,7 +36,7 @@ static int check_type_state(struct rxe_dev *rxe, struct rxe_pkt_info *pkt, case IB_QPT_UD: case IB_QPT_SMI: case IB_QPT_GSI: - if (unlikely(!(pkt->opcode & IB_OPCODE_UD))) { + if (unlikely(pkt_type != IB_OPCODE_UD)) { pr_warn_ratelimited("bad qp type\n"); goto err1; } @@ -252,7 +257,6 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) list_for_each_entry(mce, &mcg->qp_list, qp_list) { qp = mce->qp; - pkt = SKB_TO_PKT(skb); /* validate qp for incoming packet */ err = check_type_state(rxe, pkt, qp); @@ -264,12 +268,18 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) continue; /* for all but the last qp create a new clone of the - * skb and pass to the qp. + * skb and pass to the qp. If an error occurs in the + * checks for the last qp in the list we need to + * free the skb since it hasn't been passed on to + * rxe_rcv_pkt() which would free it later. */ - if (mce->qp_list.next != &mcg->qp_list) + if (mce->qp_list.next != &mcg->qp_list) { per_qp_skb = skb_clone(skb, GFP_ATOMIC); - else + } else { per_qp_skb = skb; + /* show we have consumed the skb */ + skb = NULL; + } if (unlikely(!per_qp_skb)) continue; @@ -284,9 +294,8 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb) rxe_drop_ref(mcg); /* drop ref from rxe_pool_get_key. */ - return; - err1: + /* free skb if not consumed */ kfree_skb(skb); } diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h index adda78996219..368959ae9a8c 100644 --- a/drivers/infiniband/sw/siw/siw.h +++ b/drivers/infiniband/sw/siw/siw.h @@ -653,7 +653,7 @@ static inline struct siw_sqe *orq_get_free(struct siw_qp *qp) { struct siw_sqe *orq_e = orq_get_tail(qp); - if (orq_e && READ_ONCE(orq_e->flags) == 0) + if (READ_ONCE(orq_e->flags) == 0) return orq_e; return NULL; diff --git a/drivers/infiniband/sw/siw/siw_main.c b/drivers/infiniband/sw/siw/siw_main.c index ee95cf29179d..41c46dfaebf6 100644 --- a/drivers/infiniband/sw/siw/siw_main.c +++ b/drivers/infiniband/sw/siw/siw_main.c @@ -135,7 +135,7 @@ static struct { static int siw_init_cpulist(void) { - int i, num_nodes = num_possible_nodes(); + int i, num_nodes = nr_node_ids; memset(siw_tx_thread, 0, sizeof(siw_tx_thread)); diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c index 875d36d4b1c6..ddb2e66f9f13 100644 --- a/drivers/infiniband/sw/siw/siw_qp.c +++ b/drivers/infiniband/sw/siw/siw_qp.c @@ -199,26 +199,26 @@ void siw_qp_llp_write_space(struct sock *sk) static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size) { - irq_size = roundup_pow_of_two(irq_size); - orq_size = roundup_pow_of_two(orq_size); - - qp->attrs.irq_size = irq_size; - qp->attrs.orq_size = orq_size; - - qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe)); - if (!qp->irq) { - siw_dbg_qp(qp, "irq malloc for %d failed\n", irq_size); - qp->attrs.irq_size = 0; - return -ENOMEM; + if (irq_size) { + irq_size = roundup_pow_of_two(irq_size); + qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe)); + if (!qp->irq) { + qp->attrs.irq_size = 0; + return -ENOMEM; + } } - qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe)); - if (!qp->orq) { - siw_dbg_qp(qp, "orq malloc for %d failed\n", orq_size); - qp->attrs.orq_size = 0; - qp->attrs.irq_size = 0; - vfree(qp->irq); - return -ENOMEM; + if (orq_size) { + orq_size = roundup_pow_of_two(orq_size); + qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe)); + if (!qp->orq) { + qp->attrs.orq_size = 0; + qp->attrs.irq_size = 0; + vfree(qp->irq); + return -ENOMEM; + } } + qp->attrs.irq_size = irq_size; + qp->attrs.orq_size = orq_size; siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size); return 0; } @@ -288,13 +288,14 @@ int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl) if (ctrl & MPA_V2_RDMA_WRITE_RTR) wqe->sqe.opcode = SIW_OP_WRITE; else if (ctrl & MPA_V2_RDMA_READ_RTR) { - struct siw_sqe *rreq; + struct siw_sqe *rreq = NULL; wqe->sqe.opcode = SIW_OP_READ; spin_lock(&qp->orq_lock); - rreq = orq_get_free(qp); + if (qp->attrs.orq_size) + rreq = orq_get_free(qp); if (rreq) { siw_read_to_orq(rreq, &wqe->sqe); qp->orq_put++; @@ -877,135 +878,88 @@ void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe) rreq->num_sge = 1; } -/* - * Must be called with SQ locked. - * To avoid complete SQ starvation by constant inbound READ requests, - * the active IRQ will not be served after qp->irq_burst, if the - * SQ has pending work. - */ -int siw_activate_tx(struct siw_qp *qp) +static int siw_activate_tx_from_sq(struct siw_qp *qp) { - struct siw_sqe *irqe, *sqe; + struct siw_sqe *sqe; struct siw_wqe *wqe = tx_wqe(qp); int rv = 1; - irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size]; - - if (irqe->flags & SIW_WQE_VALID) { - sqe = sq_get_next(qp); - - /* - * Avoid local WQE processing starvation in case - * of constant inbound READ request stream - */ - if (sqe && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) { - qp->irq_burst = 0; - goto skip_irq; - } - memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); - wqe->wr_status = SIW_WR_QUEUED; - - /* start READ RESPONSE */ - wqe->sqe.opcode = SIW_OP_READ_RESPONSE; - wqe->sqe.flags = 0; - if (irqe->num_sge) { - wqe->sqe.num_sge = 1; - wqe->sqe.sge[0].length = irqe->sge[0].length; - wqe->sqe.sge[0].laddr = irqe->sge[0].laddr; - wqe->sqe.sge[0].lkey = irqe->sge[0].lkey; - } else { - wqe->sqe.num_sge = 0; - } - - /* Retain original RREQ's message sequence number for - * potential error reporting cases. - */ - wqe->sqe.sge[1].length = irqe->sge[1].length; - - wqe->sqe.rkey = irqe->rkey; - wqe->sqe.raddr = irqe->raddr; + sqe = sq_get_next(qp); + if (!sqe) + return 0; - wqe->processed = 0; - qp->irq_get++; + memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); + wqe->wr_status = SIW_WR_QUEUED; - /* mark current IRQ entry free */ - smp_store_mb(irqe->flags, 0); + /* First copy SQE to kernel private memory */ + memcpy(&wqe->sqe, sqe, sizeof(*sqe)); + if (wqe->sqe.opcode >= SIW_NUM_OPCODES) { + rv = -EINVAL; goto out; } - sqe = sq_get_next(qp); - if (sqe) { -skip_irq: - memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); - wqe->wr_status = SIW_WR_QUEUED; - - /* First copy SQE to kernel private memory */ - memcpy(&wqe->sqe, sqe, sizeof(*sqe)); - - if (wqe->sqe.opcode >= SIW_NUM_OPCODES) { + if (wqe->sqe.flags & SIW_WQE_INLINE) { + if (wqe->sqe.opcode != SIW_OP_SEND && + wqe->sqe.opcode != SIW_OP_WRITE) { rv = -EINVAL; goto out; } - if (wqe->sqe.flags & SIW_WQE_INLINE) { - if (wqe->sqe.opcode != SIW_OP_SEND && - wqe->sqe.opcode != SIW_OP_WRITE) { - rv = -EINVAL; - goto out; - } - if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) { - rv = -EINVAL; - goto out; - } - wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1]; - wqe->sqe.sge[0].lkey = 0; - wqe->sqe.num_sge = 1; + if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) { + rv = -EINVAL; + goto out; } - if (wqe->sqe.flags & SIW_WQE_READ_FENCE) { - /* A READ cannot be fenced */ - if (unlikely(wqe->sqe.opcode == SIW_OP_READ || - wqe->sqe.opcode == - SIW_OP_READ_LOCAL_INV)) { - siw_dbg_qp(qp, "cannot fence read\n"); - rv = -EINVAL; - goto out; - } - spin_lock(&qp->orq_lock); + wqe->sqe.sge[0].laddr = (uintptr_t)&wqe->sqe.sge[1]; + wqe->sqe.sge[0].lkey = 0; + wqe->sqe.num_sge = 1; + } + if (wqe->sqe.flags & SIW_WQE_READ_FENCE) { + /* A READ cannot be fenced */ + if (unlikely(wqe->sqe.opcode == SIW_OP_READ || + wqe->sqe.opcode == + SIW_OP_READ_LOCAL_INV)) { + siw_dbg_qp(qp, "cannot fence read\n"); + rv = -EINVAL; + goto out; + } + spin_lock(&qp->orq_lock); - if (!siw_orq_empty(qp)) { - qp->tx_ctx.orq_fence = 1; - rv = 0; - } - spin_unlock(&qp->orq_lock); + if (qp->attrs.orq_size && !siw_orq_empty(qp)) { + qp->tx_ctx.orq_fence = 1; + rv = 0; + } + spin_unlock(&qp->orq_lock); - } else if (wqe->sqe.opcode == SIW_OP_READ || - wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) { - struct siw_sqe *rreq; + } else if (wqe->sqe.opcode == SIW_OP_READ || + wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) { + struct siw_sqe *rreq; - wqe->sqe.num_sge = 1; + if (unlikely(!qp->attrs.orq_size)) { + /* We negotiated not to send READ req's */ + rv = -EINVAL; + goto out; + } + wqe->sqe.num_sge = 1; - spin_lock(&qp->orq_lock); + spin_lock(&qp->orq_lock); - rreq = orq_get_free(qp); - if (rreq) { - /* - * Make an immediate copy in ORQ to be ready - * to process loopback READ reply - */ - siw_read_to_orq(rreq, &wqe->sqe); - qp->orq_put++; - } else { - qp->tx_ctx.orq_fence = 1; - rv = 0; - } - spin_unlock(&qp->orq_lock); + rreq = orq_get_free(qp); + if (rreq) { + /* + * Make an immediate copy in ORQ to be ready + * to process loopback READ reply + */ + siw_read_to_orq(rreq, &wqe->sqe); + qp->orq_put++; + } else { + qp->tx_ctx.orq_fence = 1; + rv = 0; } - - /* Clear SQE, can be re-used by application */ - smp_store_mb(sqe->flags, 0); - qp->sq_get++; - } else { - rv = 0; + spin_unlock(&qp->orq_lock); } + + /* Clear SQE, can be re-used by application */ + smp_store_mb(sqe->flags, 0); + qp->sq_get++; out: if (unlikely(rv < 0)) { siw_dbg_qp(qp, "error %d\n", rv); @@ -1014,6 +968,65 @@ int siw_activate_tx(struct siw_qp *qp) return rv; } +/* + * Must be called with SQ locked. + * To avoid complete SQ starvation by constant inbound READ requests, + * the active IRQ will not be served after qp->irq_burst, if the + * SQ has pending work. + */ +int siw_activate_tx(struct siw_qp *qp) +{ + struct siw_sqe *irqe; + struct siw_wqe *wqe = tx_wqe(qp); + + if (!qp->attrs.irq_size) + return siw_activate_tx_from_sq(qp); + + irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size]; + + if (!(irqe->flags & SIW_WQE_VALID)) + return siw_activate_tx_from_sq(qp); + + /* + * Avoid local WQE processing starvation in case + * of constant inbound READ request stream + */ + if (sq_get_next(qp) && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) { + qp->irq_burst = 0; + return siw_activate_tx_from_sq(qp); + } + memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE); + wqe->wr_status = SIW_WR_QUEUED; + + /* start READ RESPONSE */ + wqe->sqe.opcode = SIW_OP_READ_RESPONSE; + wqe->sqe.flags = 0; + if (irqe->num_sge) { + wqe->sqe.num_sge = 1; + wqe->sqe.sge[0].length = irqe->sge[0].length; + wqe->sqe.sge[0].laddr = irqe->sge[0].laddr; + wqe->sqe.sge[0].lkey = irqe->sge[0].lkey; + } else { + wqe->sqe.num_sge = 0; + } + + /* Retain original RREQ's message sequence number for + * potential error reporting cases. + */ + wqe->sqe.sge[1].length = irqe->sge[1].length; + + wqe->sqe.rkey = irqe->rkey; + wqe->sqe.raddr = irqe->raddr; + + wqe->processed = 0; + qp->irq_get++; + + /* mark current IRQ entry free */ + smp_store_mb(irqe->flags, 0); + + return 1; +} + /* * Check if current CQ state qualifies for calling CQ completion * handler. Must be called with CQ lock held. diff --git a/drivers/infiniband/sw/siw/siw_qp_rx.c b/drivers/infiniband/sw/siw/siw_qp_rx.c index 4bd1f1f84057..60116f20653c 100644 --- a/drivers/infiniband/sw/siw/siw_qp_rx.c +++ b/drivers/infiniband/sw/siw/siw_qp_rx.c @@ -680,6 +680,10 @@ static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx) } spin_lock_irqsave(&qp->sq_lock, flags); + if (unlikely(!qp->attrs.irq_size)) { + run_sq = 0; + goto error_irq; + } if (tx_work->wr_status == SIW_WR_IDLE) { /* * immediately schedule READ response w/o @@ -712,8 +716,9 @@ static int siw_init_rresp(struct siw_qp *qp, struct siw_rx_stream *srx) /* RRESP now valid as current TX wqe or placed into IRQ */ smp_store_mb(resp->flags, SIW_WQE_VALID); } else { - pr_warn("siw: [QP %u]: irq %d exceeded %d\n", qp_id(qp), - qp->irq_put % qp->attrs.irq_size, qp->attrs.irq_size); +error_irq: + pr_warn("siw: [QP %u]: IRQ exceeded or null, size %d\n", + qp_id(qp), qp->attrs.irq_size); siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP, RDMAP_ETYPE_REMOTE_OPERATION, @@ -740,6 +745,9 @@ static int siw_orqe_start_rx(struct siw_qp *qp) struct siw_sqe *orqe; struct siw_wqe *wqe = NULL; + if (unlikely(!qp->attrs.orq_size)) + return -EPROTO; + /* make sure ORQ indices are current */ smp_mb(); @@ -796,8 +804,8 @@ int siw_proc_rresp(struct siw_qp *qp) */ rv = siw_orqe_start_rx(qp); if (rv) { - pr_warn("siw: [QP %u]: ORQ empty at idx %d\n", - qp_id(qp), qp->orq_get % qp->attrs.orq_size); + pr_warn("siw: [QP %u]: ORQ empty, size %d\n", + qp_id(qp), qp->attrs.orq_size); goto error_term; } rv = siw_rresp_check_ntoh(srx, frx); @@ -1290,11 +1298,13 @@ static int siw_rdmap_complete(struct siw_qp *qp, int error) wc_status); siw_wqe_put_mem(wqe, SIW_OP_READ); - if (!error) + if (!error) { rv = siw_check_tx_fence(qp); - else - /* Disable current ORQ eleement */ - WRITE_ONCE(orq_get_current(qp)->flags, 0); + } else { + /* Disable current ORQ element */ + if (qp->attrs.orq_size) + WRITE_ONCE(orq_get_current(qp)->flags, 0); + } break; case RDMAP_RDMA_READ_REQ: diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c index d19d8325588b..7989c4043db4 100644 --- a/drivers/infiniband/sw/siw/siw_qp_tx.c +++ b/drivers/infiniband/sw/siw/siw_qp_tx.c @@ -1107,8 +1107,8 @@ int siw_qp_sq_process(struct siw_qp *qp) /* * RREQ may have already been completed by inbound RRESP! */ - if (tx_type == SIW_OP_READ || - tx_type == SIW_OP_READ_LOCAL_INV) { + if ((tx_type == SIW_OP_READ || + tx_type == SIW_OP_READ_LOCAL_INV) && qp->attrs.orq_size) { /* Cleanup pending entry in ORQ */ qp->orq_put--; qp->orq[qp->orq_put % qp->attrs.orq_size].flags = 0; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 68fd053fc774..e389d44e5591 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -365,13 +365,23 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, if (rv) goto err_out; + num_sqe = attrs->cap.max_send_wr; + num_rqe = attrs->cap.max_recv_wr; + /* All queue indices are derived from modulo operations * on a free running 'get' (consumer) and 'put' (producer) * unsigned counter. Having queue sizes at power of two * avoids handling counter wrap around. */ - num_sqe = roundup_pow_of_two(attrs->cap.max_send_wr); - num_rqe = roundup_pow_of_two(attrs->cap.max_recv_wr); + if (num_sqe) + num_sqe = roundup_pow_of_two(num_sqe); + else { + /* Zero sized SQ is not supported */ + rv = -EINVAL; + goto err_out; + } + if (num_rqe) + num_rqe = roundup_pow_of_two(num_rqe); if (udata) qp->sendq = vmalloc_user(num_sqe * sizeof(struct siw_sqe)); @@ -379,7 +389,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, qp->sendq = vzalloc(num_sqe * sizeof(struct siw_sqe)); if (qp->sendq == NULL) { - siw_dbg(base_dev, "SQ size %d alloc failed\n", num_sqe); rv = -ENOMEM; goto err_out_xa; } @@ -413,7 +422,6 @@ struct ib_qp *siw_create_qp(struct ib_pd *pd, qp->recvq = vzalloc(num_rqe * sizeof(struct siw_rqe)); if (qp->recvq == NULL) { - siw_dbg(base_dev, "RQ size %d alloc failed\n", num_rqe); rv = -ENOMEM; goto err_out_xa; } @@ -966,9 +974,9 @@ int siw_post_receive(struct ib_qp *base_qp, const struct ib_recv_wr *wr, unsigned long flags; int rv = 0; - if (qp->srq) { + if (qp->srq || qp->attrs.rq_size == 0) { *bad_wr = wr; - return -EOPNOTSUPP; /* what else from errno.h? */ + return -EINVAL; } if (!rdma_is_kernel_res(&qp->base_qp.res)) { siw_dbg_qp(qp, "no kernel post_recv for user mapped rq\n"); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c index ba00f0de14ca..ad77659800cd 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt-sysfs.c @@ -408,6 +408,7 @@ int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess) "%s", str); if (err) { pr_err("kobject_init_and_add: %d\n", err); + kobject_put(&sess->kobj); return err; } err = sysfs_create_group(&sess->kobj, &rtrs_clt_sess_attr_group); @@ -419,6 +420,7 @@ int rtrs_clt_create_sess_files(struct rtrs_clt_sess *sess) &sess->kobj, "stats"); if (err) { pr_err("kobject_init_and_add: %d\n", err); + kobject_put(&sess->stats->kobj_stats); goto remove_group; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 67f86c405a26..394c1f6822b9 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -31,6 +31,8 @@ */ #define RTRS_RECONNECT_SEED 8 +#define FIRST_CONN 0x01 + MODULE_DESCRIPTION("RDMA Transport Client"); MODULE_LICENSE("GPL"); @@ -494,7 +496,7 @@ static void rtrs_clt_recv_done(struct rtrs_clt_con *con, struct ib_wc *wc) int err; struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); - WARN_ON(sess->flags != RTRS_MSG_NEW_RKEY_F); + WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0); iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); err = rtrs_iu_post_recv(&con->c, iu); @@ -514,7 +516,7 @@ static void rtrs_clt_rkey_rsp_done(struct rtrs_clt_con *con, struct ib_wc *wc) u32 buf_id; int err; - WARN_ON(sess->flags != RTRS_MSG_NEW_RKEY_F); + WARN_ON((sess->flags & RTRS_MSG_NEW_RKEY_F) == 0); iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); @@ -621,12 +623,12 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) } else if (imm_type == RTRS_HB_MSG_IMM) { WARN_ON(con->c.cid); rtrs_send_hb_ack(&sess->s); - if (sess->flags == RTRS_MSG_NEW_RKEY_F) + if (sess->flags & RTRS_MSG_NEW_RKEY_F) return rtrs_clt_recv_done(con, wc); } else if (imm_type == RTRS_HB_ACK_IMM) { WARN_ON(con->c.cid); sess->s.hb_missed_cnt = 0; - if (sess->flags == RTRS_MSG_NEW_RKEY_F) + if (sess->flags & RTRS_MSG_NEW_RKEY_F) return rtrs_clt_recv_done(con, wc); } else { rtrs_wrn(con->c.sess, "Unknown IMM type %u\n", @@ -654,7 +656,7 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) WARN_ON(!(wc->wc_flags & IB_WC_WITH_INVALIDATE || wc->wc_flags & IB_WC_WITH_IMM)); WARN_ON(wc->wr_cqe->done != rtrs_clt_rdma_done); - if (sess->flags == RTRS_MSG_NEW_RKEY_F) { + if (sess->flags & RTRS_MSG_NEW_RKEY_F) { if (wc->wc_flags & IB_WC_WITH_INVALIDATE) return rtrs_clt_recv_done(con, wc); @@ -664,7 +666,6 @@ static void rtrs_clt_rdma_done(struct ib_cq *cq, struct ib_wc *wc) case IB_WC_RDMA_WRITE: /* * post_send() RDMA write completions of IO reqs (read/write) - * and hb */ break; @@ -680,7 +681,7 @@ static int post_recv_io(struct rtrs_clt_con *con, size_t q_size) struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); for (i = 0; i < q_size; i++) { - if (sess->flags == RTRS_MSG_NEW_RKEY_F) { + if (sess->flags & RTRS_MSG_NEW_RKEY_F) { struct rtrs_iu *iu = &con->rsp_ius[i]; err = rtrs_iu_post_recv(&con->c, iu); @@ -1511,7 +1512,7 @@ static void destroy_con(struct rtrs_clt_con *con) static int create_con_cq_qp(struct rtrs_clt_con *con) { struct rtrs_clt_sess *sess = to_clt_sess(con->c.sess); - u16 wr_queue_size; + u32 max_send_wr, max_recv_wr, cq_size; int err, cq_vector; struct rtrs_msg_rkey_rsp *rsp; @@ -1523,7 +1524,8 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) * + 2 for drain and heartbeat * in case qp gets into error state */ - wr_queue_size = SERVICE_CON_QUEUE_DEPTH * 3 + 2; + max_send_wr = SERVICE_CON_QUEUE_DEPTH * 2 + 2; + max_recv_wr = SERVICE_CON_QUEUE_DEPTH * 2 + 2; /* We must be the first here */ if (WARN_ON(sess->s.dev)) return -EINVAL; @@ -1555,25 +1557,29 @@ static int create_con_cq_qp(struct rtrs_clt_con *con) /* Shared between connections */ sess->s.dev_ref++; - wr_queue_size = + max_send_wr = min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr, /* QD * (REQ + RSP + FR REGS or INVS) + drain */ sess->queue_depth * 3 + 1); + max_recv_wr = + min_t(int, sess->s.dev->ib_dev->attrs.max_qp_wr, + sess->queue_depth * 3 + 1); } /* alloc iu to recv new rkey reply when server reports flags set */ - if (sess->flags == RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) { - con->rsp_ius = rtrs_iu_alloc(wr_queue_size, sizeof(*rsp), + if (sess->flags & RTRS_MSG_NEW_RKEY_F || con->c.cid == 0) { + con->rsp_ius = rtrs_iu_alloc(max_recv_wr, sizeof(*rsp), GFP_KERNEL, sess->s.dev->ib_dev, DMA_FROM_DEVICE, rtrs_clt_rdma_done); if (!con->rsp_ius) return -ENOMEM; - con->queue_size = wr_queue_size; + con->queue_size = max_recv_wr; } + cq_size = max_send_wr + max_recv_wr; cq_vector = con->cpu % sess->s.dev->ib_dev->num_comp_vectors; err = rtrs_cq_qp_create(&sess->s, &con->c, sess->max_send_sge, - cq_vector, wr_queue_size, wr_queue_size, - IB_POLL_SOFTIRQ); + cq_vector, cq_size, max_send_wr, + max_recv_wr, IB_POLL_SOFTIRQ); /* * In case of error we do not bother to clean previous allocations, * since destroy_con_cq_qp() must be called. @@ -1657,6 +1663,7 @@ static int rtrs_rdma_route_resolved(struct rtrs_clt_con *con) .cid_num = cpu_to_le16(sess->s.con_num), .recon_cnt = cpu_to_le16(sess->s.recon_cnt), }; + msg.first_conn = sess->for_new_clt ? FIRST_CONN : 0; uuid_copy(&msg.sess_uuid, &sess->s.uuid); uuid_copy(&msg.paths_uuid, &clt->paths_uuid); @@ -1742,6 +1749,8 @@ static int rtrs_rdma_conn_established(struct rtrs_clt_con *con, scnprintf(sess->hca_name, sizeof(sess->hca_name), sess->s.dev->ib_dev->name); sess->s.src_addr = con->c.cm_id->route.addr.src_addr; + /* set for_new_clt, to allow future reconnect on any path */ + sess->for_new_clt = 1; } return 0; @@ -2565,11 +2574,8 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, clt->dev.class = rtrs_clt_dev_class; clt->dev.release = rtrs_clt_dev_release; err = dev_set_name(&clt->dev, "%s", sessname); - if (err) { - free_percpu(clt->pcpu_path); - kfree(clt); - return ERR_PTR(err); - } + if (err) + goto err; /* * Suppress user space notification until * sysfs files are created @@ -2577,29 +2583,31 @@ static struct rtrs_clt *alloc_clt(const char *sessname, size_t paths_num, dev_set_uevent_suppress(&clt->dev, true); err = device_register(&clt->dev); if (err) { - free_percpu(clt->pcpu_path); put_device(&clt->dev); - return ERR_PTR(err); + goto err; } clt->kobj_paths = kobject_create_and_add("paths", &clt->dev.kobj); if (!clt->kobj_paths) { - free_percpu(clt->pcpu_path); - device_unregister(&clt->dev); - return NULL; + err = -ENOMEM; + goto err_dev; } err = rtrs_clt_create_sysfs_root_files(clt); if (err) { - free_percpu(clt->pcpu_path); kobject_del(clt->kobj_paths); kobject_put(clt->kobj_paths); - device_unregister(&clt->dev); - return ERR_PTR(err); + goto err_dev; } dev_set_uevent_suppress(&clt->dev, false); kobject_uevent(&clt->dev.kobj, KOBJ_ADD); return clt; +err_dev: + device_unregister(&clt->dev); +err: + free_percpu(clt->pcpu_path); + kfree(clt); + return ERR_PTR(err); } static void wait_for_inflight_permits(struct rtrs_clt *clt) @@ -2672,6 +2680,8 @@ struct rtrs_clt *rtrs_clt_open(struct rtrs_clt_ops *ops, err = PTR_ERR(sess); goto close_all_sess; } + if (!i) + sess->for_new_clt = 1; list_add_tail_rcu(&sess->s.entry, &clt->paths_list); err = init_sess(sess); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.h b/drivers/infiniband/ulp/rtrs/rtrs-clt.h index b8dbd701b3cb..7c9e15502796 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.h @@ -143,6 +143,7 @@ struct rtrs_clt_sess { int max_send_sge; u32 flags; struct kobject kobj; + u8 for_new_clt; struct rtrs_clt_stats *stats; /* cache hca_port and hca_name to display in sysfs */ u8 hca_port; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index 3f2918671dbe..8caad0a2322b 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -188,7 +188,9 @@ struct rtrs_msg_conn_req { __le16 recon_cnt; uuid_t sess_uuid; uuid_t paths_uuid; - u8 reserved[12]; + u8 first_conn : 1; + u8 reserved_bits : 7; + u8 reserved[11]; }; /** @@ -303,8 +305,9 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, struct ib_send_wr *head); int rtrs_cq_qp_create(struct rtrs_sess *rtrs_sess, struct rtrs_con *con, - u32 max_send_sge, int cq_vector, u16 cq_size, - u16 wr_queue_size, enum ib_poll_context poll_ctx); + u32 max_send_sge, int cq_vector, int cq_size, + u32 max_send_wr, u32 max_recv_wr, + enum ib_poll_context poll_ctx); void rtrs_cq_qp_destroy(struct rtrs_con *con); void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe, diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index d2edff3b8f0d..126a96e75c62 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -51,6 +51,8 @@ static ssize_t rtrs_srv_disconnect_store(struct kobject *kobj, sockaddr_to_str((struct sockaddr *)&sess->s.dst_addr, str, sizeof(str)); rtrs_info(s, "disconnect for path %s requested\n", str); + /* first remove sysfs itself to avoid deadlock */ + sysfs_remove_file_self(&sess->kobj, &attr->attr); close_sess(sess); return count; @@ -181,6 +183,7 @@ static int rtrs_srv_create_once_sysfs_root_folders(struct rtrs_srv_sess *sess) err = -ENOMEM; pr_err("kobject_create_and_add(): %d\n", err); device_del(&srv->dev); + put_device(&srv->dev); goto unlock; } dev_set_uevent_suppress(&srv->dev, false); @@ -206,6 +209,7 @@ rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_sess *sess) kobject_put(srv->kobj_paths); mutex_unlock(&srv->paths_mutex); device_del(&srv->dev); + put_device(&srv->dev); } else { mutex_unlock(&srv->paths_mutex); } @@ -234,6 +238,7 @@ static int rtrs_srv_create_stats_files(struct rtrs_srv_sess *sess) &sess->kobj, "stats"); if (err) { rtrs_err(s, "kobject_init_and_add(): %d\n", err); + kobject_put(&sess->stats->kobj_stats); return err; } err = sysfs_create_group(&sess->stats->kobj_stats, @@ -290,8 +295,8 @@ int rtrs_srv_create_sess_files(struct rtrs_srv_sess *sess) sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group); put_kobj: kobject_del(&sess->kobj); - kobject_put(&sess->kobj); destroy_root: + kobject_put(&sess->kobj); rtrs_srv_destroy_once_sysfs_root_folders(sess); return err; @@ -302,7 +307,7 @@ void rtrs_srv_destroy_sess_files(struct rtrs_srv_sess *sess) if (sess->kobj.state_in_sysfs) { kobject_del(&sess->stats->kobj_stats); kobject_put(&sess->stats->kobj_stats); - kobject_del(&sess->kobj); + sysfs_remove_group(&sess->kobj, &rtrs_srv_sess_attr_group); kobject_put(&sess->kobj); rtrs_srv_destroy_once_sysfs_root_folders(sess); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index c42fd470c4eb..d071809e3ed2 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -222,7 +222,8 @@ static int rdma_write_sg(struct rtrs_srv_op *id) dma_addr_t dma_addr = sess->dma_addr[id->msg_id]; struct rtrs_srv_mr *srv_mr; struct rtrs_srv *srv = sess->srv; - struct ib_send_wr inv_wr, imm_wr; + struct ib_send_wr inv_wr; + struct ib_rdma_wr imm_wr; struct ib_rdma_wr *wr = NULL; enum ib_send_flags flags; size_t sg_cnt; @@ -267,21 +268,22 @@ static int rdma_write_sg(struct rtrs_srv_op *id) WARN_ON_ONCE(rkey != wr->rkey); wr->wr.opcode = IB_WR_RDMA_WRITE; + wr->wr.wr_cqe = &io_comp_cqe; wr->wr.ex.imm_data = 0; wr->wr.send_flags = 0; if (need_inval && always_invalidate) { wr->wr.next = &rwr.wr; rwr.wr.next = &inv_wr; - inv_wr.next = &imm_wr; + inv_wr.next = &imm_wr.wr; } else if (always_invalidate) { wr->wr.next = &rwr.wr; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; } else if (need_inval) { wr->wr.next = &inv_wr; - inv_wr.next = &imm_wr; + inv_wr.next = &imm_wr.wr; } else { - wr->wr.next = &imm_wr; + wr->wr.next = &imm_wr.wr; } /* * From time to time we have to post signaled sends, @@ -294,16 +296,18 @@ static int rdma_write_sg(struct rtrs_srv_op *id) inv_wr.sg_list = NULL; inv_wr.num_sge = 0; inv_wr.opcode = IB_WR_SEND_WITH_INV; + inv_wr.wr_cqe = &io_comp_cqe; inv_wr.send_flags = 0; inv_wr.ex.invalidate_rkey = rkey; } - imm_wr.next = NULL; + imm_wr.wr.next = NULL; if (always_invalidate) { struct rtrs_msg_rkey_rsp *msg; srv_mr = &sess->mrs[id->msg_id]; rwr.wr.opcode = IB_WR_REG_MR; + rwr.wr.wr_cqe = &local_reg_cqe; rwr.wr.num_sge = 0; rwr.mr = srv_mr->mr; rwr.wr.send_flags = 0; @@ -318,22 +322,22 @@ static int rdma_write_sg(struct rtrs_srv_op *id) list.addr = srv_mr->iu->dma_addr; list.length = sizeof(*msg); list.lkey = sess->s.dev->ib_pd->local_dma_lkey; - imm_wr.sg_list = &list; - imm_wr.num_sge = 1; - imm_wr.opcode = IB_WR_SEND_WITH_IMM; + imm_wr.wr.sg_list = &list; + imm_wr.wr.num_sge = 1; + imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM; ib_dma_sync_single_for_device(sess->s.dev->ib_dev, srv_mr->iu->dma_addr, srv_mr->iu->size, DMA_TO_DEVICE); } else { - imm_wr.sg_list = NULL; - imm_wr.num_sge = 0; - imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; + imm_wr.wr.sg_list = NULL; + imm_wr.wr.num_sge = 0; + imm_wr.wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; } - imm_wr.send_flags = flags; - imm_wr.ex.imm_data = cpu_to_be32(rtrs_to_io_rsp_imm(id->msg_id, + imm_wr.wr.send_flags = flags; + imm_wr.wr.ex.imm_data = cpu_to_be32(rtrs_to_io_rsp_imm(id->msg_id, 0, need_inval)); - imm_wr.wr_cqe = &io_comp_cqe; + imm_wr.wr.wr_cqe = &io_comp_cqe; ib_dma_sync_single_for_device(sess->s.dev->ib_dev, dma_addr, offset, DMA_BIDIRECTIONAL); @@ -360,7 +364,8 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, { struct rtrs_sess *s = con->c.sess; struct rtrs_srv_sess *sess = to_srv_sess(s); - struct ib_send_wr inv_wr, imm_wr, *wr = NULL; + struct ib_send_wr inv_wr, *wr = NULL; + struct ib_rdma_wr imm_wr; struct ib_reg_wr rwr; struct rtrs_srv *srv = sess->srv; struct rtrs_srv_mr *srv_mr; @@ -379,6 +384,7 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, if (need_inval) { if (likely(sg_cnt)) { + inv_wr.wr_cqe = &io_comp_cqe; inv_wr.sg_list = NULL; inv_wr.num_sge = 0; inv_wr.opcode = IB_WR_SEND_WITH_INV; @@ -396,15 +402,15 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, if (need_inval && always_invalidate) { wr = &inv_wr; inv_wr.next = &rwr.wr; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; } else if (always_invalidate) { wr = &rwr.wr; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; } else if (need_inval) { wr = &inv_wr; - inv_wr.next = &imm_wr; + inv_wr.next = &imm_wr.wr; } else { - wr = &imm_wr; + wr = &imm_wr.wr; } /* * From time to time we have to post signalled sends, @@ -413,14 +419,15 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, flags = (atomic_inc_return(&con->wr_cnt) % srv->queue_depth) ? 0 : IB_SEND_SIGNALED; imm = rtrs_to_io_rsp_imm(id->msg_id, errno, need_inval); - imm_wr.next = NULL; + imm_wr.wr.next = NULL; if (always_invalidate) { struct ib_sge list; struct rtrs_msg_rkey_rsp *msg; srv_mr = &sess->mrs[id->msg_id]; - rwr.wr.next = &imm_wr; + rwr.wr.next = &imm_wr.wr; rwr.wr.opcode = IB_WR_REG_MR; + rwr.wr.wr_cqe = &local_reg_cqe; rwr.wr.num_sge = 0; rwr.wr.send_flags = 0; rwr.mr = srv_mr->mr; @@ -435,21 +442,21 @@ static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, list.addr = srv_mr->iu->dma_addr; list.length = sizeof(*msg); list.lkey = sess->s.dev->ib_pd->local_dma_lkey; - imm_wr.sg_list = &list; - imm_wr.num_sge = 1; - imm_wr.opcode = IB_WR_SEND_WITH_IMM; + imm_wr.wr.sg_list = &list; + imm_wr.wr.num_sge = 1; + imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM; ib_dma_sync_single_for_device(sess->s.dev->ib_dev, srv_mr->iu->dma_addr, srv_mr->iu->size, DMA_TO_DEVICE); } else { - imm_wr.sg_list = NULL; - imm_wr.num_sge = 0; - imm_wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; + imm_wr.wr.sg_list = NULL; + imm_wr.wr.num_sge = 0; + imm_wr.wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; } - imm_wr.send_flags = flags; - imm_wr.wr_cqe = &io_comp_cqe; + imm_wr.wr.send_flags = flags; + imm_wr.wr.wr_cqe = &io_comp_cqe; - imm_wr.ex.imm_data = cpu_to_be32(imm); + imm_wr.wr.ex.imm_data = cpu_to_be32(imm); err = ib_post_send(id->con->c.qp, wr, NULL); if (unlikely(err)) @@ -651,7 +658,7 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) if (!srv_mr->iu) { err = -ENOMEM; rtrs_err(ss, "rtrs_iu_alloc(), err: %d\n", err); - goto free_iu; + goto dereg_mr; } } /* Eventually dma addr for each chunk can be cached */ @@ -667,7 +674,6 @@ static int map_cont_bufs(struct rtrs_srv_sess *sess) srv_mr = &sess->mrs[mri]; sgt = &srv_mr->sgt; mr = srv_mr->mr; -free_iu: rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); dereg_mr: ib_dereg_mr(mr); @@ -814,7 +820,7 @@ static int process_info_req(struct rtrs_srv_con *con, rwr[mri].wr.opcode = IB_WR_REG_MR; rwr[mri].wr.wr_cqe = &local_reg_cqe; rwr[mri].wr.num_sge = 0; - rwr[mri].wr.send_flags = mri ? 0 : IB_SEND_SIGNALED; + rwr[mri].wr.send_flags = 0; rwr[mri].mr = mr; rwr[mri].key = mr->rkey; rwr[mri].access = (IB_ACCESS_LOCAL_WRITE | @@ -1238,7 +1244,6 @@ static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) case IB_WC_SEND: /* * post_send() RDMA write completions of IO reqs (read/write) - * and hb */ atomic_add(srv->queue_depth, &con->sq_wr_avail); @@ -1328,7 +1333,8 @@ static void free_srv(struct rtrs_srv *srv) } static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, - const uuid_t *paths_uuid) + const uuid_t *paths_uuid, + bool first_conn) { struct rtrs_srv *srv; int i; @@ -1341,13 +1347,18 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, return srv; } } + mutex_unlock(&ctx->srv_mutex); + /* + * If this request is not the first connection request from the + * client for this session then fail and return error. + */ + if (!first_conn) + return ERR_PTR(-ENXIO); /* need to allocate a new srv */ srv = kzalloc(sizeof(*srv), GFP_KERNEL); - if (!srv) { - mutex_unlock(&ctx->srv_mutex); - return NULL; - } + if (!srv) + return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&srv->paths_list); mutex_init(&srv->paths_mutex); @@ -1357,8 +1368,6 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, srv->ctx = ctx; device_initialize(&srv->dev); srv->dev.release = rtrs_srv_dev_release; - list_add(&srv->ctx_list, &ctx->srv_list); - mutex_unlock(&ctx->srv_mutex); srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks), GFP_KERNEL); @@ -1371,6 +1380,9 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, goto err_free_chunks; } refcount_set(&srv->refcount, 1); + mutex_lock(&ctx->srv_mutex); + list_add(&srv->ctx_list, &ctx->srv_list); + mutex_unlock(&ctx->srv_mutex); return srv; @@ -1381,7 +1393,7 @@ static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, err_free_srv: kfree(srv); - return NULL; + return ERR_PTR(-ENOMEM); } static void put_srv(struct rtrs_srv *srv) @@ -1461,10 +1473,12 @@ static bool __is_path_w_addr_exists(struct rtrs_srv *srv, static void free_sess(struct rtrs_srv_sess *sess) { - if (sess->kobj.state_in_sysfs) + if (sess->kobj.state_in_sysfs) { + kobject_del(&sess->kobj); kobject_put(&sess->kobj); - else + } else { kfree(sess); + } } static void rtrs_srv_close_work(struct work_struct *work) @@ -1586,7 +1600,7 @@ static int create_con(struct rtrs_srv_sess *sess, struct rtrs_sess *s = &sess->s; struct rtrs_srv_con *con; - u16 cq_size, wr_queue_size; + u32 cq_size, wr_queue_size; int err, cq_vector; con = kzalloc(sizeof(*con), GFP_KERNEL); @@ -1600,7 +1614,7 @@ static int create_con(struct rtrs_srv_sess *sess, con->c.cm_id = cm_id; con->c.sess = &sess->s; con->c.cid = cid; - atomic_set(&con->wr_cnt, 0); + atomic_set(&con->wr_cnt, 1); if (con->c.cid == 0) { /* @@ -1630,7 +1644,8 @@ static int create_con(struct rtrs_srv_sess *sess, /* TODO: SOFTIRQ can be faster, but be careful with softirq context */ err = rtrs_cq_qp_create(&sess->s, &con->c, 1, cq_vector, cq_size, - wr_queue_size, IB_POLL_WORKQUEUE); + wr_queue_size, wr_queue_size, + IB_POLL_WORKQUEUE); if (err) { rtrs_err(s, "rtrs_cq_qp_create(), err: %d\n", err); goto free_con; @@ -1781,13 +1796,9 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, goto reject_w_econnreset; } recon_cnt = le16_to_cpu(msg->recon_cnt); - srv = get_or_create_srv(ctx, &msg->paths_uuid); - /* - * "refcount == 0" happens if a previous thread calls get_or_create_srv - * allocate srv, but chunks of srv are not allocated yet. - */ - if (!srv || refcount_read(&srv->refcount) == 0) { - err = -ENOMEM; + srv = get_or_create_srv(ctx, &msg->paths_uuid, msg->first_conn); + if (IS_ERR(srv)) { + err = PTR_ERR(srv); goto reject_w_err; } mutex_lock(&srv->paths_mutex); @@ -1862,8 +1873,8 @@ static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, return rtrs_rdma_do_reject(cm_id, -ECONNRESET); close_and_return_err: - close_sess(sess); mutex_unlock(&srv->paths_mutex); + close_sess(sess); return err; } diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index 2e3a849e0a77..d13aff0aa816 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -182,16 +182,16 @@ int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, u32 imm_data, enum ib_send_flags flags, struct ib_send_wr *head) { - struct ib_send_wr wr; + struct ib_rdma_wr wr; - wr = (struct ib_send_wr) { - .wr_cqe = cqe, - .send_flags = flags, - .opcode = IB_WR_RDMA_WRITE_WITH_IMM, - .ex.imm_data = cpu_to_be32(imm_data), + wr = (struct ib_rdma_wr) { + .wr.wr_cqe = cqe, + .wr.send_flags = flags, + .wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM, + .wr.ex.imm_data = cpu_to_be32(imm_data), }; - return rtrs_post_send(con->qp, head, &wr); + return rtrs_post_send(con->qp, head, &wr.wr); } EXPORT_SYMBOL_GPL(rtrs_post_rdma_write_imm_empty); @@ -231,14 +231,14 @@ static int create_cq(struct rtrs_con *con, int cq_vector, u16 cq_size, } static int create_qp(struct rtrs_con *con, struct ib_pd *pd, - u16 wr_queue_size, u32 max_sge) + u32 max_send_wr, u32 max_recv_wr, u32 max_sge) { struct ib_qp_init_attr init_attr = {NULL}; struct rdma_cm_id *cm_id = con->cm_id; int ret; - init_attr.cap.max_send_wr = wr_queue_size; - init_attr.cap.max_recv_wr = wr_queue_size; + init_attr.cap.max_send_wr = max_send_wr; + init_attr.cap.max_recv_wr = max_recv_wr; init_attr.cap.max_recv_sge = 1; init_attr.event_handler = qp_event_handler; init_attr.qp_context = con; @@ -260,8 +260,9 @@ static int create_qp(struct rtrs_con *con, struct ib_pd *pd, } int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, - u32 max_send_sge, int cq_vector, u16 cq_size, - u16 wr_queue_size, enum ib_poll_context poll_ctx) + u32 max_send_sge, int cq_vector, int cq_size, + u32 max_send_wr, u32 max_recv_wr, + enum ib_poll_context poll_ctx) { int err; @@ -269,7 +270,8 @@ int rtrs_cq_qp_create(struct rtrs_sess *sess, struct rtrs_con *con, if (err) return err; - err = create_qp(con, sess->dev->ib_pd, wr_queue_size, max_send_sge); + err = create_qp(con, sess->dev->ib_pd, max_send_wr, max_recv_wr, + max_send_sge); if (err) { ib_free_cq(con->cq); con->cq = NULL; @@ -308,7 +310,7 @@ void rtrs_send_hb_ack(struct rtrs_sess *sess) imm = rtrs_to_imm(RTRS_HB_ACK_IMM, 0); err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm, - IB_SEND_SIGNALED, NULL); + 0, NULL); if (err) { sess->hb_err_handler(usr_con); return; @@ -337,7 +339,7 @@ static void hb_work(struct work_struct *work) } imm = rtrs_to_imm(RTRS_HB_MSG_IMM, 0); err = rtrs_post_rdma_write_imm_empty(usr_con, sess->hb_cqe, imm, - IB_SEND_SIGNALED, NULL); + 0, NULL); if (err) { sess->hb_err_handler(usr_con); return; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 5492b66a8153..31f8aa2c40ed 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3628,7 +3628,7 @@ static ssize_t srp_create_target(struct device *dev, struct srp_rdma_ch *ch; struct srp_device *srp_dev = host->srp_dev; struct ib_device *ibdev = srp_dev->dev; - int ret, node_idx, node, cpu, i; + int ret, i, ch_idx; unsigned int max_sectors_per_mr, mr_per_cmd = 0; bool multich = false; uint32_t max_iu_len; @@ -3753,81 +3753,61 @@ static ssize_t srp_create_target(struct device *dev, goto out; ret = -ENOMEM; - if (target->ch_count == 0) + if (target->ch_count == 0) { target->ch_count = - max_t(unsigned int, num_online_nodes(), - min(ch_count ?: - min(4 * num_online_nodes(), - ibdev->num_comp_vectors), - num_online_cpus())); + min(ch_count ?: + max(4 * num_online_nodes(), + ibdev->num_comp_vectors), + num_online_cpus()); + } + target->ch = kcalloc(target->ch_count, sizeof(*target->ch), GFP_KERNEL); if (!target->ch) goto out; - node_idx = 0; - for_each_online_node(node) { - const int ch_start = (node_idx * target->ch_count / - num_online_nodes()); - const int ch_end = ((node_idx + 1) * target->ch_count / - num_online_nodes()); - const int cv_start = node_idx * ibdev->num_comp_vectors / - num_online_nodes(); - const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors / - num_online_nodes(); - int cpu_idx = 0; - - for_each_online_cpu(cpu) { - if (cpu_to_node(cpu) != node) - continue; - if (ch_start + cpu_idx >= ch_end) - continue; - ch = &target->ch[ch_start + cpu_idx]; - ch->target = target; - ch->comp_vector = cv_start == cv_end ? cv_start : - cv_start + cpu_idx % (cv_end - cv_start); - spin_lock_init(&ch->lock); - INIT_LIST_HEAD(&ch->free_tx); - ret = srp_new_cm_id(ch); - if (ret) - goto err_disconnect; + for (ch_idx = 0; ch_idx < target->ch_count; ++ch_idx) { + ch = &target->ch[ch_idx]; + ch->target = target; + ch->comp_vector = ch_idx % ibdev->num_comp_vectors; + spin_lock_init(&ch->lock); + INIT_LIST_HEAD(&ch->free_tx); + ret = srp_new_cm_id(ch); + if (ret) + goto err_disconnect; - ret = srp_create_ch_ib(ch); - if (ret) - goto err_disconnect; + ret = srp_create_ch_ib(ch); + if (ret) + goto err_disconnect; - ret = srp_alloc_req_data(ch); - if (ret) - goto err_disconnect; + ret = srp_alloc_req_data(ch); + if (ret) + goto err_disconnect; - ret = srp_connect_ch(ch, max_iu_len, multich); - if (ret) { - char dst[64]; - - if (target->using_rdma_cm) - snprintf(dst, sizeof(dst), "%pIS", - &target->rdma_cm.dst); - else - snprintf(dst, sizeof(dst), "%pI6", - target->ib_cm.orig_dgid.raw); - shost_printk(KERN_ERR, target->scsi_host, - PFX "Connection %d/%d to %s failed\n", - ch_start + cpu_idx, - target->ch_count, dst); - if (node_idx == 0 && cpu_idx == 0) { - goto free_ch; - } else { - srp_free_ch_ib(target, ch); - srp_free_req_data(target, ch); - target->ch_count = ch - target->ch; - goto connected; - } - } + ret = srp_connect_ch(ch, max_iu_len, multich); + if (ret) { + char dst[64]; - multich = true; - cpu_idx++; + if (target->using_rdma_cm) + snprintf(dst, sizeof(dst), "%pIS", + &target->rdma_cm.dst); + else + snprintf(dst, sizeof(dst), "%pI6", + target->ib_cm.orig_dgid.raw); + shost_printk(KERN_ERR, target->scsi_host, + PFX "Connection %d/%d to %s failed\n", + ch_idx, + target->ch_count, dst); + if (ch_idx == 0) { + goto free_ch; + } else { + srp_free_ch_ib(target, ch); + srp_free_req_data(target, ch); + target->ch_count = ch - target->ch; + goto connected; + } } - node_idx++; + multich = true; } connected: diff --git a/drivers/input/joydev.c b/drivers/input/joydev.c index a2b5fbba2d3b..430dc6975004 100644 --- a/drivers/input/joydev.c +++ b/drivers/input/joydev.c @@ -456,7 +456,7 @@ static int joydev_handle_JSIOCSAXMAP(struct joydev *joydev, if (IS_ERR(abspam)) return PTR_ERR(abspam); - for (i = 0; i < joydev->nabs; i++) { + for (i = 0; i < len && i < joydev->nabs; i++) { if (abspam[i] > ABS_MAX) { retval = -EINVAL; goto out; @@ -480,6 +480,9 @@ static int joydev_handle_JSIOCSBTNMAP(struct joydev *joydev, int i; int retval = 0; + if (len % sizeof(*keypam)) + return -EINVAL; + len = min(len, sizeof(joydev->keypam)); /* Validate the map. */ @@ -487,7 +490,7 @@ static int joydev_handle_JSIOCSBTNMAP(struct joydev *joydev, if (IS_ERR(keypam)) return PTR_ERR(keypam); - for (i = 0; i < joydev->nkey; i++) { + for (i = 0; i < (len / 2) && i < joydev->nkey; i++) { if (keypam[i] > KEY_MAX || keypam[i] < BTN_MISC) { retval = -EINVAL; goto out; diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 8cc8ca4a9ac0..9f0d07dcbf06 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -305,6 +305,7 @@ static const struct xpad_device { { 0x1bad, 0xfd00, "Razer Onza TE", 0, XTYPE_XBOX360 }, { 0x1bad, 0xfd01, "Razer Onza", 0, XTYPE_XBOX360 }, { 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE }, + { 0x20d6, 0x2009, "PowerA Enhanced Wired Controller for Xbox Series X|S", 0, XTYPE_XBOXONE }, { 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 }, { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE }, { 0x24c6, 0x5000, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig index 2b321c17054a..94eab82086b2 100644 --- a/drivers/input/keyboard/Kconfig +++ b/drivers/input/keyboard/Kconfig @@ -446,7 +446,7 @@ config KEYBOARD_MPR121 config KEYBOARD_SNVS_PWRKEY tristate "IMX SNVS Power Key Driver" - depends on ARCH_MXC || COMPILE_TEST + depends on ARCH_MXC || (COMPILE_TEST && HAS_IOMEM) depends on OF help This is the snvs powerkey driver for the Freescale i.MX application diff --git a/drivers/input/keyboard/applespi.c b/drivers/input/keyboard/applespi.c index d22223154177..27e87c45edf2 100644 --- a/drivers/input/keyboard/applespi.c +++ b/drivers/input/keyboard/applespi.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include @@ -409,7 +410,7 @@ struct applespi_data { unsigned int cmd_msg_cntr; /* lock to protect the above parameters and flags below */ spinlock_t cmd_msg_lock; - bool cmd_msg_queued; + ktime_t cmd_msg_queued; enum applespi_evt_type cmd_evt_type; struct led_classdev backlight_info; @@ -729,7 +730,7 @@ static void applespi_msg_complete(struct applespi_data *applespi, wake_up_all(&applespi->drain_complete); if (is_write_msg) { - applespi->cmd_msg_queued = false; + applespi->cmd_msg_queued = 0; applespi_send_cmd_msg(applespi); } @@ -771,8 +772,16 @@ static int applespi_send_cmd_msg(struct applespi_data *applespi) return 0; /* check whether send is in progress */ - if (applespi->cmd_msg_queued) - return 0; + if (applespi->cmd_msg_queued) { + if (ktime_ms_delta(ktime_get(), applespi->cmd_msg_queued) < 1000) + return 0; + + dev_warn(&applespi->spi->dev, "Command %d timed out\n", + applespi->cmd_evt_type); + + applespi->cmd_msg_queued = 0; + applespi->write_active = false; + } /* set up packet */ memset(packet, 0, APPLESPI_PACKET_SIZE); @@ -869,7 +878,7 @@ static int applespi_send_cmd_msg(struct applespi_data *applespi) return sts; } - applespi->cmd_msg_queued = true; + applespi->cmd_msg_queued = ktime_get_coarse(); applespi->write_active = true; return 0; @@ -1921,7 +1930,7 @@ static int __maybe_unused applespi_resume(struct device *dev) applespi->drain = false; applespi->have_cl_led_on = false; applespi->have_bl_level = 0; - applespi->cmd_msg_queued = false; + applespi->cmd_msg_queued = 0; applespi->read_active = false; applespi->write_active = false; diff --git a/drivers/input/misc/da7280.c b/drivers/input/misc/da7280.c index 37568b00873d..b08610d6e575 100644 --- a/drivers/input/misc/da7280.c +++ b/drivers/input/misc/da7280.c @@ -863,6 +863,7 @@ static void da7280_parse_properties(struct device *dev, gpi_str3[7] = '0' + i; haptics->gpi_ctl[i].polarity = 0; error = device_property_read_string(dev, gpi_str3, &str); + if (!error) haptics->gpi_ctl[i].polarity = da7280_haptic_of_gpi_pol_str(dev, str); } @@ -1299,11 +1300,13 @@ static int __maybe_unused da7280_resume(struct device *dev) return retval; } +#ifdef CONFIG_OF static const struct of_device_id da7280_of_match[] = { { .compatible = "dlg,da7280", }, { } }; MODULE_DEVICE_TABLE(of, da7280_of_match); +#endif static const struct i2c_device_id da7280_i2c_id[] = { { "da7280", }, diff --git a/drivers/input/serio/i8042-x86ia64io.h b/drivers/input/serio/i8042-x86ia64io.h index c74b020796a9..9119e12a5778 100644 --- a/drivers/input/serio/i8042-x86ia64io.h +++ b/drivers/input/serio/i8042-x86ia64io.h @@ -588,6 +588,10 @@ static const struct dmi_system_id i8042_dmi_noselftest_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_CHASSIS_TYPE, "10"), /* Notebook */ }, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_CHASSIS_TYPE, "31"), /* Convertible Notebook */ + }, }, { } }; diff --git a/drivers/input/serio/serport.c b/drivers/input/serio/serport.c index 8ac970a423de..33e9d9bfd036 100644 --- a/drivers/input/serio/serport.c +++ b/drivers/input/serio/serport.c @@ -156,7 +156,9 @@ static void serport_ldisc_receive(struct tty_struct *tty, const unsigned char *c * returning 0 characters. */ -static ssize_t serport_ldisc_read(struct tty_struct * tty, struct file * file, unsigned char __user * buf, size_t nr) +static ssize_t serport_ldisc_read(struct tty_struct * tty, struct file * file, + unsigned char *kbuf, size_t nr, + void **cookie, unsigned long offset) { struct serport *serport = (struct serport*) tty->disc_data; struct serio *serio; diff --git a/drivers/input/touchscreen/elo.c b/drivers/input/touchscreen/elo.c index e0bacd34866a..96173232e53f 100644 --- a/drivers/input/touchscreen/elo.c +++ b/drivers/input/touchscreen/elo.c @@ -341,8 +341,10 @@ static int elo_connect(struct serio *serio, struct serio_driver *drv) switch (elo->id) { case 0: /* 10-byte protocol */ - if (elo_setup_10(elo)) + if (elo_setup_10(elo)) { + err = -EIO; goto fail3; + } break; diff --git a/drivers/input/touchscreen/raydium_i2c_ts.c b/drivers/input/touchscreen/raydium_i2c_ts.c index 603a948460d6..4d2d22a86977 100644 --- a/drivers/input/touchscreen/raydium_i2c_ts.c +++ b/drivers/input/touchscreen/raydium_i2c_ts.c @@ -445,6 +445,7 @@ static int raydium_i2c_write_object(struct i2c_client *client, enum raydium_bl_ack state) { int error; + static const u8 cmd[] = { 0xFF, 0x39 }; error = raydium_i2c_send(client, RM_CMD_BOOT_WRT, data, len); if (error) { @@ -453,7 +454,7 @@ static int raydium_i2c_write_object(struct i2c_client *client, return error; } - error = raydium_i2c_send(client, RM_CMD_BOOT_ACK, NULL, 0); + error = raydium_i2c_send(client, RM_CMD_BOOT_ACK, cmd, sizeof(cmd)); if (error) { dev_err(&client->dev, "Ack obj command failed: %d\n", error); return error; diff --git a/drivers/input/touchscreen/st1232.c b/drivers/input/touchscreen/st1232.c index b4e7bcbe9b91..6abae665ca71 100644 --- a/drivers/input/touchscreen/st1232.c +++ b/drivers/input/touchscreen/st1232.c @@ -94,8 +94,13 @@ static int st1232_ts_wait_ready(struct st1232_ts_data *ts) for (retries = 10; retries; retries--) { error = st1232_ts_read_data(ts, REG_STATUS, 1); - if (!error && ts->read_buf[0] == (STATUS_NORMAL | ERROR_NONE)) - return 0; + if (!error) { + switch (ts->read_buf[0]) { + case STATUS_NORMAL | ERROR_NONE: + case STATUS_IDLE | ERROR_NONE: + return 0; + } + } usleep_range(1000, 2000); } diff --git a/drivers/input/touchscreen/sur40.c b/drivers/input/touchscreen/sur40.c index 620cdd7d214a..12f2562b0141 100644 --- a/drivers/input/touchscreen/sur40.c +++ b/drivers/input/touchscreen/sur40.c @@ -787,6 +787,7 @@ static int sur40_probe(struct usb_interface *interface, dev_err(&interface->dev, "Unable to register video controls."); v4l2_ctrl_handler_free(&sur40->hdl); + error = sur40->hdl.error; goto err_unreg_v4l2; } diff --git a/drivers/input/touchscreen/zinitix.c b/drivers/input/touchscreen/zinitix.c index a3e3adbabc67..b1548971d683 100644 --- a/drivers/input/touchscreen/zinitix.c +++ b/drivers/input/touchscreen/zinitix.c @@ -190,7 +190,7 @@ static int zinitix_write_cmd(struct i2c_client *client, u16 reg) return 0; } -static bool zinitix_init_touch(struct bt541_ts_data *bt541) +static int zinitix_init_touch(struct bt541_ts_data *bt541) { struct i2c_client *client = bt541->client; int i; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 83d8ab2aed9f..398909dab640 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -254,6 +255,8 @@ static enum iommu_init_state init_state = IOMMU_START_STATE; static int amd_iommu_enable_interrupts(void); static int __init iommu_go_to_state(enum iommu_init_state state); static void init_device_table_dma(void); +static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, + u8 fxn, u64 *value, bool is_write); static bool amd_iommu_pre_enabled = true; @@ -1712,13 +1715,11 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } -static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, - u8 fxn, u64 *value, bool is_write); - -static void init_iommu_perf_ctr(struct amd_iommu *iommu) +static void __init init_iommu_perf_ctr(struct amd_iommu *iommu) { + int retry; struct pci_dev *pdev = iommu->dev; - u64 val = 0xabcd, val2 = 0, save_reg = 0; + u64 val = 0xabcd, val2 = 0, save_reg, save_src; if (!iommu_feature(iommu, FEATURE_PC)) return; @@ -1726,17 +1727,39 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) amd_iommu_pc_present = true; /* save the value to restore, if writable */ - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false)) + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false) || + iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, false)) goto pc_false; - /* Check if the performance counters can be written to */ - if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) || - (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) || - (val != val2)) + /* + * Disable power gating by programing the performance counter + * source to 20 (i.e. counts the reads and writes from/to IOMMU + * Reserved Register [MMIO Offset 1FF8h] that are ignored.), + * which never get incremented during this init phase. + * (Note: The event is also deprecated.) + */ + val = 20; + if (iommu_pc_get_set_reg(iommu, 0, 0, 8, &val, true)) goto pc_false; + /* Check if the performance counters can be written to */ + val = 0xabcd; + for (retry = 5; retry; retry--) { + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true) || + iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false) || + val2) + break; + + /* Wait about 20 msec for power gating to disable and retry. */ + msleep(20); + } + /* restore */ - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true)) + if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true) || + iommu_pc_get_set_reg(iommu, 0, 0, 8, &save_src, true)) + goto pc_false; + + if (val != val2) goto pc_false; pci_info(pdev, "IOMMU performance counters supported\n"); @@ -2689,7 +2712,6 @@ static int __init early_amd_iommu_init(void) struct acpi_table_header *ivrs_base; acpi_status status; int i, remap_cache_sz, ret = 0; - u32 pci_id; if (!amd_iommu_detected) return -ENODEV; @@ -2779,16 +2801,6 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; - /* Disable IOMMU if there's Stoney Ridge graphics */ - for (i = 0; i < 32; i++) { - pci_id = read_pci_config(0, i, 0, 0); - if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { - pr_info("Disable IOMMU on Stoney Ridge\n"); - amd_iommu_disabled = true; - break; - } - } - /* Disable any previously enabled IOMMUs */ if (!is_kdump_kernel() || amd_iommu_disabled) disable_iommus(); @@ -2856,6 +2868,7 @@ static bool detect_ivrs(void) { struct acpi_table_header *ivrs_base; acpi_status status; + int i; status = acpi_get_table("IVRS", 0, &ivrs_base); if (status == AE_NOT_FOUND) @@ -2868,6 +2881,17 @@ static bool detect_ivrs(void) acpi_put_table(ivrs_base); + /* Don't use IOMMU if there is Stoney Ridge graphics */ + for (i = 0; i < 32; i++) { + u32 pci_id; + + pci_id = read_pci_config(0, i, 0, 0); + if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { + pr_info("Disable IOMMU on Stoney Ridge\n"); + return false; + } + } + /* Make sure ACS will be enabled during PCI probe */ pci_request_acs(); @@ -2894,12 +2918,12 @@ static int __init state_next(void) } break; case IOMMU_IVRS_DETECTED: - ret = early_amd_iommu_init(); - init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; - if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) { - pr_info("AMD IOMMU disabled\n"); + if (amd_iommu_disabled) { init_state = IOMMU_CMDLINE_DISABLED; ret = -EINVAL; + } else { + ret = early_amd_iommu_init(); + init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; } break; case IOMMU_ACPI_FINISHED: @@ -2974,11 +2998,17 @@ int __init amd_iommu_prepare(void) { int ret; + if (amd_iommu_disabled) + return -ENODEV; + amd_iommu_irq_remap = true; ret = iommu_go_to_state(IOMMU_ACPI_FINISHED); - if (ret) + if (ret) { + amd_iommu_irq_remap = false; return ret; + } + return amd_iommu_irq_remap ? 0 : -ENODEV; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index f0adbc48fd17..9256f84f5ebf 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -1502,6 +1502,10 @@ static bool increase_address_space(struct protection_domain *domain, bool ret = true; u64 *pte; + pte = (void *)get_zeroed_page(gfp); + if (!pte) + return false; + spin_lock_irqsave(&domain->lock, flags); amd_iommu_domain_get_pgtable(domain, &pgtable); @@ -1513,10 +1517,6 @@ static bool increase_address_space(struct protection_domain *domain, if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL)) goto out; - pte = (void *)get_zeroed_page(gfp); - if (!pte) - goto out; - *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root)); pgtable.root = pte; @@ -1530,10 +1530,12 @@ static bool increase_address_space(struct protection_domain *domain, */ amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode); + pte = NULL; ret = true; out: spin_unlock_irqrestore(&domain->lock, flags); + free_page((unsigned long)pte); return ret; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 8ca7415d785d..c70d6e79f534 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2280,7 +2280,7 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start, + arm_smmu_tlb_inv_range(gather->start, gather->end - gather->start + 1, gather->pgsize, true, smmu_domain); } diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index bcda17012aee..abb1d2f4ce30 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -206,6 +206,8 @@ static int qcom_smmu_cfg_probe(struct arm_smmu_device *smmu) smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i)); if (FIELD_GET(ARM_SMMU_SMR_VALID, smr)) { + /* Ignore valid bit for SMR mask extraction. */ + smr &= ~ARM_SMMU_SMR_VALID; smmu->smrs[i].id = FIELD_GET(ARM_SMMU_SMR_ID, smr); smmu->smrs[i].mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr); smmu->smrs[i].valid = true; diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 4078358ed66e..00fbc591a142 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -309,6 +309,11 @@ static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad) domain->ops->flush_iotlb_all(domain); } +static bool dev_is_untrusted(struct device *dev) +{ + return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; +} + /** * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() @@ -363,8 +368,9 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, init_iova_domain(iovad, 1UL << order, base_pfn); - if (!cookie->fq_domain && !iommu_domain_get_attr(domain, - DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) { + if (!cookie->fq_domain && (!dev || !dev_is_untrusted(dev)) && + !iommu_domain_get_attr(domain, DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && + attr) { if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, iommu_dma_entry_dtor)) pr_warn("iova flush queue initialization failed\n"); @@ -521,11 +527,6 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr, iova_align(iovad, size), dir, attrs); } -static bool dev_is_untrusted(struct device *dev) -{ - return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; -} - static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, size_t size, int prot, u64 dma_mask) { diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 97dfcffbf495..444c0bec221a 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -30,8 +30,8 @@ #define VCMD_VRSP_IP 0x1 #define VCMD_VRSP_SC(e) (((e) >> 1) & 0x3) #define VCMD_VRSP_SC_SUCCESS 0 -#define VCMD_VRSP_SC_NO_PASID_AVAIL 1 -#define VCMD_VRSP_SC_INVALID_PASID 1 +#define VCMD_VRSP_SC_NO_PASID_AVAIL 2 +#define VCMD_VRSP_SC_INVALID_PASID 2 #define VCMD_VRSP_RESULT_PASID(e) (((e) >> 8) & 0xfffff) #define VCMD_CMD_OPERAND(e) ((e) << 8) /* diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 18a9f05df407..b3bcd6dec93e 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -1079,8 +1079,17 @@ static irqreturn_t prq_event_thread(int irq, void *d) * Clear the page request overflow bit and wake up all threads that * are waiting for the completion of this handling. */ - if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) - writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); + if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { + pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n", + iommu->name); + head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; + tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; + if (head == tail) { + writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); + pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared", + iommu->name); + } + } if (!completion_done(&iommu->prq_complete)) complete(&iommu->prq_complete); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index ffeebda8d6de..fd5f59373fc6 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -2426,9 +2426,6 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, size -= pgsize; } - if (ops->iotlb_sync_map) - ops->iotlb_sync_map(domain); - /* unroll mapping in case something went wrong */ if (ret) iommu_unmap(domain, orig_iova, orig_size - size); @@ -2438,18 +2435,31 @@ static int __iommu_map(struct iommu_domain *domain, unsigned long iova, return ret; } +static int _iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +{ + const struct iommu_ops *ops = domain->ops; + int ret; + + ret = __iommu_map(domain, iova, paddr, size, prot, gfp); + if (ret == 0 && ops->iotlb_sync_map) + ops->iotlb_sync_map(domain); + + return ret; +} + int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { might_sleep(); - return __iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); + return _iommu_map(domain, iova, paddr, size, prot, GFP_KERNEL); } EXPORT_SYMBOL_GPL(iommu_map); int iommu_map_atomic(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { - return __iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); + return _iommu_map(domain, iova, paddr, size, prot, GFP_ATOMIC); } EXPORT_SYMBOL_GPL(iommu_map_atomic); @@ -2533,6 +2543,7 @@ static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, unsigned int nents, int prot, gfp_t gfp) { + const struct iommu_ops *ops = domain->ops; size_t len = 0, mapped = 0; phys_addr_t start; unsigned int i = 0; @@ -2563,6 +2574,8 @@ static size_t __iommu_map_sg(struct iommu_domain *domain, unsigned long iova, sg = sg_next(sg); } + if (ops->iotlb_sync_map) + ops->iotlb_sync_map(domain); return mapped; out_err: diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 8e56cec532e7..bfe6ec329f8d 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -444,7 +444,7 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather) { struct mtk_iommu_data *data = mtk_iommu_get_m4u_data(); - size_t length = gather->end - gather->start; + size_t length = gather->end - gather->start + 1; if (gather->start == ULONG_MAX) return; diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 4a3f095a1c26..602aab98c079 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -798,10 +798,69 @@ static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain, return SMMU_PFN_PHYS(pfn) + SMMU_OFFSET_IN_PAGE(iova); } +static struct tegra_smmu *tegra_smmu_find(struct device_node *np) +{ + struct platform_device *pdev; + struct tegra_mc *mc; + + pdev = of_find_device_by_node(np); + if (!pdev) + return NULL; + + mc = platform_get_drvdata(pdev); + if (!mc) + return NULL; + + return mc->smmu; +} + +static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev, + struct of_phandle_args *args) +{ + const struct iommu_ops *ops = smmu->iommu.ops; + int err; + + err = iommu_fwspec_init(dev, &dev->of_node->fwnode, ops); + if (err < 0) { + dev_err(dev, "failed to initialize fwspec: %d\n", err); + return err; + } + + err = ops->of_xlate(dev, args); + if (err < 0) { + dev_err(dev, "failed to parse SW group ID: %d\n", err); + iommu_fwspec_free(dev); + return err; + } + + return 0; +} + static struct iommu_device *tegra_smmu_probe_device(struct device *dev) { - struct tegra_smmu *smmu = dev_iommu_priv_get(dev); + struct device_node *np = dev->of_node; + struct tegra_smmu *smmu = NULL; + struct of_phandle_args args; + unsigned int index = 0; + int err; + + while (of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index, + &args) == 0) { + smmu = tegra_smmu_find(args.np); + if (smmu) { + err = tegra_smmu_configure(smmu, dev, &args); + if (err < 0) { + of_node_put(args.np); + return ERR_PTR(err); + } + } + + of_node_put(args.np); + index++; + } + + smmu = dev_iommu_priv_get(dev); if (!smmu) return ERR_PTR(-ENODEV); @@ -1028,6 +1087,16 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, if (!smmu) return ERR_PTR(-ENOMEM); + /* + * This is a bit of a hack. Ideally we'd want to simply return this + * value. However the IOMMU registration process will attempt to add + * all devices to the IOMMU when bus_set_iommu() is called. In order + * not to rely on global variables to track the IOMMU instance, we + * set it here so that it can be looked up from the .probe_device() + * callback via the IOMMU device's .drvdata field. + */ + mc->smmu = smmu; + size = BITS_TO_LONGS(soc->num_asids) * sizeof(long); smmu->asids = devm_kzalloc(dev, size, GFP_KERNEL); diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index b147f22a78f4..d7d1a0fab2c1 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -457,7 +457,8 @@ config IMX_IRQSTEER Support for the i.MX IRQSTEER interrupt multiplexer/remapper. config IMX_INTMUX - def_bool y if ARCH_MXC || COMPILE_TEST + bool "i.MX INTMUX support" if COMPILE_TEST + default y if ARCH_MXC select IRQ_DOMAIN help Support for the i.MX INTMUX interrupt multiplexer. diff --git a/drivers/irqchip/irq-ingenic-tcu.c b/drivers/irqchip/irq-ingenic-tcu.c index 7a7222d4c19c..b938d1d04d96 100644 --- a/drivers/irqchip/irq-ingenic-tcu.c +++ b/drivers/irqchip/irq-ingenic-tcu.c @@ -179,5 +179,6 @@ static int __init ingenic_tcu_irq_init(struct device_node *np, } IRQCHIP_DECLARE(jz4740_tcu_irq, "ingenic,jz4740-tcu", ingenic_tcu_irq_init); IRQCHIP_DECLARE(jz4725b_tcu_irq, "ingenic,jz4725b-tcu", ingenic_tcu_irq_init); +IRQCHIP_DECLARE(jz4760_tcu_irq, "ingenic,jz4760-tcu", ingenic_tcu_irq_init); IRQCHIP_DECLARE(jz4770_tcu_irq, "ingenic,jz4770-tcu", ingenic_tcu_irq_init); IRQCHIP_DECLARE(x1000_tcu_irq, "ingenic,x1000-tcu", ingenic_tcu_irq_init); diff --git a/drivers/irqchip/irq-ingenic.c b/drivers/irqchip/irq-ingenic.c index b61a8901ef72..ea36bb00be80 100644 --- a/drivers/irqchip/irq-ingenic.c +++ b/drivers/irqchip/irq-ingenic.c @@ -155,6 +155,7 @@ static int __init intc_2chip_of_init(struct device_node *node, { return ingenic_intc_of_init(node, 2); } +IRQCHIP_DECLARE(jz4760_intc, "ingenic,jz4760-intc", intc_2chip_of_init); IRQCHIP_DECLARE(jz4770_intc, "ingenic,jz4770-intc", intc_2chip_of_init); IRQCHIP_DECLARE(jz4775_intc, "ingenic,jz4775-intc", intc_2chip_of_init); IRQCHIP_DECLARE(jz4780_intc, "ingenic,jz4780-intc", intc_2chip_of_init); diff --git a/drivers/irqchip/irq-loongson-pch-msi.c b/drivers/irqchip/irq-loongson-pch-msi.c index 12aeeab43289..32562b7e681b 100644 --- a/drivers/irqchip/irq-loongson-pch-msi.c +++ b/drivers/irqchip/irq-loongson-pch-msi.c @@ -225,7 +225,7 @@ static int pch_msi_init(struct device_node *node, goto err_priv; } - priv->msi_map = bitmap_alloc(priv->num_irqs, GFP_KERNEL); + priv->msi_map = bitmap_zalloc(priv->num_irqs, GFP_KERNEL); if (!priv->msi_map) { ret = -ENOMEM; goto err_priv; diff --git a/drivers/irqchip/irq-ls-extirq.c b/drivers/irqchip/irq-ls-extirq.c index f94f974a8764..853b3972dbe7 100644 --- a/drivers/irqchip/irq-ls-extirq.c +++ b/drivers/irqchip/irq-ls-extirq.c @@ -64,7 +64,7 @@ static struct irq_chip ls_extirq_chip = { .irq_set_type = ls_extirq_set_type, .irq_retrigger = irq_chip_retrigger_hierarchy, .irq_set_affinity = irq_chip_set_affinity_parent, - .flags = IRQCHIP_SET_TYPE_MASKED, + .flags = IRQCHIP_SET_TYPE_MASKED | IRQCHIP_SKIP_SET_WAKE, }; static int diff --git a/drivers/macintosh/adb-iop.c b/drivers/macintosh/adb-iop.c index 0ee327249150..2633bc254935 100644 --- a/drivers/macintosh/adb-iop.c +++ b/drivers/macintosh/adb-iop.c @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -249,7 +250,7 @@ static void adb_iop_set_ap_complete(struct iop_msg *msg) { struct adb_iopmsg *amsg = (struct adb_iopmsg *)msg->message; - autopoll_devs = (amsg->data[1] << 8) | amsg->data[0]; + autopoll_devs = get_unaligned_be16(amsg->data); if (autopoll_devs & (1 << autopoll_addr)) return; autopoll_addr = autopoll_devs ? (ffs(autopoll_devs) - 1) : 0; @@ -266,8 +267,7 @@ static int adb_iop_autopoll(int devs) amsg.flags = ADB_IOP_SET_AUTOPOLL | (mask ? ADB_IOP_AUTOPOLL : 0); amsg.count = 2; amsg.cmd = 0; - amsg.data[0] = mask & 0xFF; - amsg.data[1] = (mask >> 8) & 0xFF; + put_unaligned_be16(mask, amsg.data); iop_send_message(ADB_IOP, ADB_CHAN, NULL, sizeof(amsg), (__u8 *)&amsg, adb_iop_set_ap_complete); diff --git a/drivers/mailbox/arm_mhuv2.c b/drivers/mailbox/arm_mhuv2.c index 67fb10885bb4..9f71de666e3f 100644 --- a/drivers/mailbox/arm_mhuv2.c +++ b/drivers/mailbox/arm_mhuv2.c @@ -699,7 +699,9 @@ static irqreturn_t mhuv2_receiver_interrupt(int irq, void *arg) ret = IRQ_HANDLED; } - kfree(data); + if (!IS_ERR(data)) + kfree(data); + return ret; } diff --git a/drivers/mailbox/sprd-mailbox.c b/drivers/mailbox/sprd-mailbox.c index f6fab24ae8a9..4c325301a2fe 100644 --- a/drivers/mailbox/sprd-mailbox.c +++ b/drivers/mailbox/sprd-mailbox.c @@ -35,7 +35,7 @@ #define SPRD_MBOX_IRQ_CLR BIT(0) /* Bit and mask definiation for outbox's SPRD_MBOX_FIFO_STS register */ -#define SPRD_OUTBOX_FIFO_FULL BIT(0) +#define SPRD_OUTBOX_FIFO_FULL BIT(2) #define SPRD_OUTBOX_FIFO_WR_SHIFT 16 #define SPRD_OUTBOX_FIFO_RD_SHIFT 24 #define SPRD_OUTBOX_FIFO_POS_MASK GENMASK(7, 0) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 1d57f48307e6..e8bf4f752e8b 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -1001,6 +1001,7 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent); extern struct workqueue_struct *bcache_wq; extern struct workqueue_struct *bch_journal_wq; +extern struct workqueue_struct *bch_flush_wq; extern struct mutex bch_register_lock; extern struct list_head bch_cache_sets; @@ -1042,5 +1043,7 @@ void bch_debug_exit(void); void bch_debug_init(void); void bch_request_exit(void); int bch_request_init(void); +void bch_btree_exit(void); +int bch_btree_init(void); #endif /* _BCACHE_H */ diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 910df242c83d..fe6dce125aba 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -99,6 +99,8 @@ #define PTR_HASH(c, k) \ (((k)->ptr[0] >> c->bucket_bits) | PTR_GEN(k, 0)) +static struct workqueue_struct *btree_io_wq; + #define insert_lock(s, b) ((b)->level <= (s)->lock) @@ -308,7 +310,7 @@ static void __btree_node_write_done(struct closure *cl) btree_complete_write(b, w); if (btree_node_dirty(b)) - schedule_delayed_work(&b->work, 30 * HZ); + queue_delayed_work(btree_io_wq, &b->work, 30 * HZ); closure_return_with_destructor(cl, btree_node_write_unlock); } @@ -481,7 +483,7 @@ static void bch_btree_leaf_dirty(struct btree *b, atomic_t *journal_ref) BUG_ON(!i->keys); if (!btree_node_dirty(b)) - schedule_delayed_work(&b->work, 30 * HZ); + queue_delayed_work(btree_io_wq, &b->work, 30 * HZ); set_btree_node_dirty(b); @@ -2764,3 +2766,18 @@ void bch_keybuf_init(struct keybuf *buf) spin_lock_init(&buf->lock); array_allocator_init(&buf->freelist); } + +void bch_btree_exit(void) +{ + if (btree_io_wq) + destroy_workqueue(btree_io_wq); +} + +int __init bch_btree_init(void) +{ + btree_io_wq = alloc_workqueue("bch_btree_io", WQ_MEM_RECLAIM, 0); + if (!btree_io_wq) + return -ENOMEM; + + return 0; +} diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index aefbdb7e003b..c6613e817333 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -932,8 +932,8 @@ atomic_t *bch_journal(struct cache_set *c, journal_try_write(c); } else if (!w->dirty) { w->dirty = true; - schedule_delayed_work(&c->journal.work, - msecs_to_jiffies(c->journal_delay_ms)); + queue_delayed_work(bch_flush_wq, &c->journal.work, + msecs_to_jiffies(c->journal_delay_ms)); spin_unlock(&c->journal.lock); } else { spin_unlock(&c->journal.lock); diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 2047a9cccdb5..7457ec160c9a 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -49,6 +49,7 @@ static int bcache_major; static DEFINE_IDA(bcache_device_idx); static wait_queue_head_t unregister_wait; struct workqueue_struct *bcache_wq; +struct workqueue_struct *bch_flush_wq; struct workqueue_struct *bch_journal_wq; @@ -2821,6 +2822,9 @@ static void bcache_exit(void) destroy_workqueue(bcache_wq); if (bch_journal_wq) destroy_workqueue(bch_journal_wq); + if (bch_flush_wq) + destroy_workqueue(bch_flush_wq); + bch_btree_exit(); if (bcache_major) unregister_blkdev(bcache_major, "bcache"); @@ -2876,10 +2880,26 @@ static int __init bcache_init(void) return bcache_major; } + if (bch_btree_init()) + goto err; + bcache_wq = alloc_workqueue("bcache", WQ_MEM_RECLAIM, 0); if (!bcache_wq) goto err; + /* + * Let's not make this `WQ_MEM_RECLAIM` for the following reasons: + * + * 1. It used `system_wq` before which also does no memory reclaim. + * 2. With `WQ_MEM_RECLAIM` desktop stalls, increased boot times, and + * reduced throughput can be observed. + * + * We still want to user our own queue to not congest the `system_wq`. + */ + bch_flush_wq = alloc_workqueue("bch_flush", 0, 0); + if (!bch_flush_wq) + goto err; + bch_journal_wq = alloc_workqueue("bch_journal", WQ_MEM_RECLAIM, 0); if (!bch_journal_wq) goto err; diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index fce4cbf9529d..50f3e673729c 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1526,6 +1526,10 @@ EXPORT_SYMBOL_GPL(dm_bufio_get_block_size); sector_t dm_bufio_get_device_size(struct dm_bufio_client *c) { sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT; + if (s >= c->start) + s -= c->start; + else + s = 0; if (likely(c->sectors_per_block_bits >= 0)) s >>= c->sectors_per_block_bits; else diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 086d293c2b03..2576c966a009 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -102,6 +102,10 @@ struct mapped_device { /* kobject and completion */ struct dm_kobject_holder kobj_holder; + int swap_bios; + struct semaphore swap_bios_semaphore; + struct mutex swap_bios_lock; + struct dm_stats stats; /* for blk-mq request-based DM support */ diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 5a55617a08e6..07aa619d36e7 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -3324,6 +3324,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) wake_up_process(cc->write_thread); ti->num_flush_bios = 1; + ti->limit_swap_bios = true; return 0; diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index b24e3839bb3a..d9ac7372108c 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -47,6 +47,7 @@ struct writeset { static void writeset_free(struct writeset *ws) { vfree(ws->bits); + ws->bits = NULL; } static int setup_on_disk_bitset(struct dm_disk_bitset *info, @@ -71,8 +72,6 @@ static size_t bitset_size(unsigned nr_bits) */ static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks) { - ws->md.nr_bits = nr_blocks; - ws->md.root = INVALID_WRITESET_ROOT; ws->bits = vzalloc(bitset_size(nr_blocks)); if (!ws->bits) { DMERR("%s: couldn't allocate in memory bitset", __func__); @@ -85,12 +84,14 @@ static int writeset_alloc(struct writeset *ws, dm_block_t nr_blocks) /* * Wipes the in-core bitset, and creates a new on disk bitset. */ -static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws) +static int writeset_init(struct dm_disk_bitset *info, struct writeset *ws, + dm_block_t nr_blocks) { int r; - memset(ws->bits, 0, bitset_size(ws->md.nr_bits)); + memset(ws->bits, 0, bitset_size(nr_blocks)); + ws->md.nr_bits = nr_blocks; r = setup_on_disk_bitset(info, ws->md.nr_bits, &ws->md.root); if (r) { DMERR("%s: setup_on_disk_bitset failed", __func__); @@ -134,7 +135,7 @@ static int writeset_test_and_set(struct dm_disk_bitset *info, { int r; - if (!test_and_set_bit(block, ws->bits)) { + if (!test_bit(block, ws->bits)) { r = dm_bitset_set_bit(info, ws->md.root, block, &ws->md.root); if (r) { /* FIXME: fail mode */ @@ -388,7 +389,7 @@ static void ws_dec(void *context, const void *value) static int ws_eq(void *context, const void *value1, const void *value2) { - return !memcmp(value1, value2, sizeof(struct writeset_metadata)); + return !memcmp(value1, value2, sizeof(struct writeset_disk)); } /*----------------------------------------------------------------*/ @@ -564,6 +565,15 @@ static int open_metadata(struct era_metadata *md) } disk = dm_block_data(sblock); + + /* Verify the data block size hasn't changed */ + if (le32_to_cpu(disk->data_block_size) != md->block_size) { + DMERR("changing the data block size (from %u to %llu) is not supported", + le32_to_cpu(disk->data_block_size), md->block_size); + r = -EINVAL; + goto bad; + } + r = dm_tm_open_with_sm(md->bm, SUPERBLOCK_LOCATION, disk->metadata_space_map_root, sizeof(disk->metadata_space_map_root), @@ -575,10 +585,10 @@ static int open_metadata(struct era_metadata *md) setup_infos(md); - md->block_size = le32_to_cpu(disk->data_block_size); md->nr_blocks = le32_to_cpu(disk->nr_blocks); md->current_era = le32_to_cpu(disk->current_era); + ws_unpack(&disk->current_writeset, &md->current_writeset->md); md->writeset_tree_root = le64_to_cpu(disk->writeset_tree_root); md->era_array_root = le64_to_cpu(disk->era_array_root); md->metadata_snap = le64_to_cpu(disk->metadata_snap); @@ -746,6 +756,12 @@ static int metadata_digest_lookup_writeset(struct era_metadata *md, ws_unpack(&disk, &d->writeset); d->value = cpu_to_le32(key); + /* + * We initialise another bitset info to avoid any caching side effects + * with the previous one. + */ + dm_disk_bitset_init(md->tm, &d->info); + d->nr_bits = min(d->writeset.nr_bits, md->nr_blocks); d->current_bit = 0; d->step = metadata_digest_transcribe_writeset; @@ -759,12 +775,6 @@ static int metadata_digest_start(struct era_metadata *md, struct digest *d) return 0; memset(d, 0, sizeof(*d)); - - /* - * We initialise another bitset info to avoid any caching side - * effects with the previous one. - */ - dm_disk_bitset_init(md->tm, &d->info); d->step = metadata_digest_lookup_writeset; return 0; @@ -802,6 +812,8 @@ static struct era_metadata *metadata_open(struct block_device *bdev, static void metadata_close(struct era_metadata *md) { + writeset_free(&md->writesets[0]); + writeset_free(&md->writesets[1]); destroy_persistent_data_objects(md); kfree(md); } @@ -839,6 +851,7 @@ static int metadata_resize(struct era_metadata *md, void *arg) r = writeset_alloc(&md->writesets[1], *new_size); if (r) { DMERR("%s: writeset_alloc failed for writeset 1", __func__); + writeset_free(&md->writesets[0]); return r; } @@ -849,6 +862,8 @@ static int metadata_resize(struct era_metadata *md, void *arg) &value, &md->era_array_root); if (r) { DMERR("%s: dm_array_resize failed", __func__); + writeset_free(&md->writesets[0]); + writeset_free(&md->writesets[1]); return r; } @@ -870,7 +885,6 @@ static int metadata_era_archive(struct era_metadata *md) } ws_pack(&md->current_writeset->md, &value); - md->current_writeset->md.root = INVALID_WRITESET_ROOT; keys[0] = md->current_era; __dm_bless_for_disk(&value); @@ -882,6 +896,7 @@ static int metadata_era_archive(struct era_metadata *md) return r; } + md->current_writeset->md.root = INVALID_WRITESET_ROOT; md->archived_writesets = true; return 0; @@ -898,7 +913,7 @@ static int metadata_new_era(struct era_metadata *md) int r; struct writeset *new_writeset = next_writeset(md); - r = writeset_init(&md->bitset_info, new_writeset); + r = writeset_init(&md->bitset_info, new_writeset, md->nr_blocks); if (r) { DMERR("%s: writeset_init failed", __func__); return r; @@ -951,7 +966,7 @@ static int metadata_commit(struct era_metadata *md) int r; struct dm_block *sblock; - if (md->current_writeset->md.root != SUPERBLOCK_LOCATION) { + if (md->current_writeset->md.root != INVALID_WRITESET_ROOT) { r = dm_bitset_flush(&md->bitset_info, md->current_writeset->md.root, &md->current_writeset->md.root); if (r) { @@ -1225,8 +1240,10 @@ static void process_deferred_bios(struct era *era) int r; struct bio_list deferred_bios, marked_bios; struct bio *bio; + struct blk_plug plug; bool commit_needed = false; bool failed = false; + struct writeset *ws = era->md->current_writeset; bio_list_init(&deferred_bios); bio_list_init(&marked_bios); @@ -1236,9 +1253,11 @@ static void process_deferred_bios(struct era *era) bio_list_init(&era->deferred_bios); spin_unlock(&era->deferred_lock); + if (bio_list_empty(&deferred_bios)) + return; + while ((bio = bio_list_pop(&deferred_bios))) { - r = writeset_test_and_set(&era->md->bitset_info, - era->md->current_writeset, + r = writeset_test_and_set(&era->md->bitset_info, ws, get_block(era, bio)); if (r < 0) { /* @@ -1246,7 +1265,6 @@ static void process_deferred_bios(struct era *era) * FIXME: finish. */ failed = true; - } else if (r == 0) commit_needed = true; @@ -1262,9 +1280,19 @@ static void process_deferred_bios(struct era *era) if (failed) while ((bio = bio_list_pop(&marked_bios))) bio_io_error(bio); - else - while ((bio = bio_list_pop(&marked_bios))) + else { + blk_start_plug(&plug); + while ((bio = bio_list_pop(&marked_bios))) { + /* + * Only update the in-core writeset if the on-disk one + * was updated too. + */ + if (commit_needed) + set_bit(get_block(era, bio), ws->bits); submit_bio_noacct(bio); + } + blk_finish_plug(&plug); + } } static void process_rpc_calls(struct era *era) @@ -1473,15 +1501,6 @@ static int era_ctr(struct dm_target *ti, unsigned argc, char **argv) } era->md = md; - era->nr_blocks = calc_nr_blocks(era); - - r = metadata_resize(era->md, &era->nr_blocks); - if (r) { - ti->error = "couldn't resize metadata"; - era_destroy(era); - return -ENOMEM; - } - era->wq = alloc_ordered_workqueue("dm-" DM_MSG_PREFIX, WQ_MEM_RECLAIM); if (!era->wq) { ti->error = "could not create workqueue for metadata object"; @@ -1556,16 +1575,24 @@ static int era_preresume(struct dm_target *ti) dm_block_t new_size = calc_nr_blocks(era); if (era->nr_blocks != new_size) { - r = in_worker1(era, metadata_resize, &new_size); - if (r) + r = metadata_resize(era->md, &new_size); + if (r) { + DMERR("%s: metadata_resize failed", __func__); + return r; + } + + r = metadata_commit(era->md); + if (r) { + DMERR("%s: metadata_commit failed", __func__); return r; + } era->nr_blocks = new_size; } start_worker(era); - r = in_worker0(era, metadata_new_era); + r = in_worker0(era, metadata_era_rollover); if (r) { DMERR("%s: metadata_era_rollover failed", __func__); return r; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 5e306bba4375..1ca65b434f1f 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -529,7 +529,7 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_ * Grab our output buffer. */ nl = orig_nl = get_result_buffer(param, param_size, &len); - if (len < needed) { + if (len < needed || len < sizeof(nl->dev)) { param->flags |= DM_BUFFER_FULL_FLAG; goto out; } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 4acf2342f7ad..7291fd3106ff 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -820,24 +820,24 @@ void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type) EXPORT_SYMBOL_GPL(dm_table_set_type); /* validate the dax capability of the target device span */ -int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, +int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { int blocksize = *(int *) data, id; bool rc; id = dax_read_lock(); - rc = dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len); + rc = !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len); dax_read_unlock(id); return rc; } /* Check devices support synchronous DAX */ -static int device_dax_synchronous(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) +static int device_not_dax_synchronous_capable(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) { - return dev->dax_dev && dax_synchronous(dev->dax_dev); + return !dev->dax_dev || !dax_synchronous(dev->dax_dev); } bool dm_table_supports_dax(struct dm_table *t, @@ -854,7 +854,7 @@ bool dm_table_supports_dax(struct dm_table *t, return false; if (!ti->type->iterate_devices || - !ti->type->iterate_devices(ti, iterate_fn, blocksize)) + ti->type->iterate_devices(ti, iterate_fn, blocksize)) return false; } @@ -925,7 +925,7 @@ static int dm_table_determine_type(struct dm_table *t) verify_bio_based: /* We must use this table as bio-based */ t->type = DM_TYPE_BIO_BASED; - if (dm_table_supports_dax(t, device_supports_dax, &page_size) || + if (dm_table_supports_dax(t, device_not_dax_capable, &page_size) || (list_empty(devices) && live_md_type == DM_TYPE_DAX_BIO_BASED)) { t->type = DM_TYPE_DAX_BIO_BASED; } @@ -1295,6 +1295,46 @@ struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) return &t->targets[(KEYS_PER_NODE * n) + k]; } +/* + * type->iterate_devices() should be called when the sanity check needs to + * iterate and check all underlying data devices. iterate_devices() will + * iterate all underlying data devices until it encounters a non-zero return + * code, returned by whether the input iterate_devices_callout_fn, or + * iterate_devices() itself internally. + * + * For some target type (e.g. dm-stripe), one call of iterate_devices() may + * iterate multiple underlying devices internally, in which case a non-zero + * return code returned by iterate_devices_callout_fn will stop the iteration + * in advance. + * + * Cases requiring _any_ underlying device supporting some kind of attribute, + * should use the iteration structure like dm_table_any_dev_attr(), or call + * it directly. @func should handle semantics of positive examples, e.g. + * capable of something. + * + * Cases requiring _all_ underlying devices supporting some kind of attribute, + * should use the iteration structure like dm_table_supports_nowait() or + * dm_table_supports_discards(). Or introduce dm_table_all_devs_attr() that + * uses an @anti_func that handle semantics of counter examples, e.g. not + * capable of something. So: return !dm_table_any_dev_attr(t, anti_func, data); + */ +static bool dm_table_any_dev_attr(struct dm_table *t, + iterate_devices_callout_fn func, void *data) +{ + struct dm_target *ti; + unsigned int i; + + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + + if (ti->type->iterate_devices && + ti->type->iterate_devices(ti, func, data)) + return true; + } + + return false; +} + static int count_device(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1331,15 +1371,22 @@ bool dm_table_has_no_data_devices(struct dm_table *table) return true; } -static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) +static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) { struct request_queue *q = bdev_get_queue(dev->bdev); enum blk_zoned_model *zoned_model = data; - return q && blk_queue_zoned_model(q) == *zoned_model; + return !q || blk_queue_zoned_model(q) != *zoned_model; } +/* + * Check the device zoned model based on the target feature flag. If the target + * has the DM_TARGET_ZONED_HM feature flag set, host-managed zoned devices are + * also accepted but all devices must have the same zoned model. If the target + * has the DM_TARGET_MIXED_ZONED_MODEL feature set, the devices can have any + * zoned model with all zoned devices having the same zone size. + */ static bool dm_table_supports_zoned_model(struct dm_table *t, enum blk_zoned_model zoned_model) { @@ -1349,44 +1396,37 @@ static bool dm_table_supports_zoned_model(struct dm_table *t, for (i = 0; i < dm_table_get_num_targets(t); i++) { ti = dm_table_get_target(t, i); - if (zoned_model == BLK_ZONED_HM && - !dm_target_supports_zoned_hm(ti->type)) - return false; - - if (!ti->type->iterate_devices || - !ti->type->iterate_devices(ti, device_is_zoned_model, &zoned_model)) - return false; + if (dm_target_supports_zoned_hm(ti->type)) { + if (!ti->type->iterate_devices || + ti->type->iterate_devices(ti, device_not_zoned_model, + &zoned_model)) + return false; + } else if (!dm_target_supports_mixed_zoned_model(ti->type)) { + if (zoned_model == BLK_ZONED_HM) + return false; + } } return true; } -static int device_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) +static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) { struct request_queue *q = bdev_get_queue(dev->bdev); unsigned int *zone_sectors = data; - return q && blk_queue_zone_sectors(q) == *zone_sectors; -} - -static bool dm_table_matches_zone_sectors(struct dm_table *t, - unsigned int zone_sectors) -{ - struct dm_target *ti; - unsigned i; - - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); - - if (!ti->type->iterate_devices || - !ti->type->iterate_devices(ti, device_matches_zone_sectors, &zone_sectors)) - return false; - } + if (!blk_queue_is_zoned(q)) + return 0; - return true; + return !q || blk_queue_zone_sectors(q) != *zone_sectors; } +/* + * Check consistency of zoned model and zone sectors across all targets. For + * zone sectors, if the destination device is a zoned block device, it shall + * have the specified zone_sectors. + */ static int validate_hardware_zoned_model(struct dm_table *table, enum blk_zoned_model zoned_model, unsigned int zone_sectors) @@ -1404,8 +1444,8 @@ static int validate_hardware_zoned_model(struct dm_table *table, if (!zone_sectors || !is_power_of_2(zone_sectors)) return -EINVAL; - if (!dm_table_matches_zone_sectors(table, zone_sectors)) { - DMERR("%s: zone sectors is not consistent across all devices", + if (dm_table_any_dev_attr(table, device_not_matches_zone_sectors, &zone_sectors)) { + DMERR("%s: zone sectors is not consistent across all zoned devices", dm_device_name(table->md)); return -EINVAL; } @@ -1578,29 +1618,12 @@ static int device_dax_write_cache_enabled(struct dm_target *ti, return false; } -static int dm_table_supports_dax_write_cache(struct dm_table *t) -{ - struct dm_target *ti; - unsigned i; - - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); - - if (ti->type->iterate_devices && - ti->type->iterate_devices(ti, - device_dax_write_cache_enabled, NULL)) - return true; - } - - return false; -} - -static int device_is_nonrot(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) +static int device_is_rotational(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) { struct request_queue *q = bdev_get_queue(dev->bdev); - return q && blk_queue_nonrot(q); + return q && !blk_queue_nonrot(q); } static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, @@ -1611,23 +1634,6 @@ static int device_is_not_random(struct dm_target *ti, struct dm_dev *dev, return q && !blk_queue_add_random(q); } -static bool dm_table_all_devices_attribute(struct dm_table *t, - iterate_devices_callout_fn func) -{ - struct dm_target *ti; - unsigned i; - - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); - - if (!ti->type->iterate_devices || - !ti->type->iterate_devices(ti, func, NULL)) - return false; - } - - return true; -} - static int device_not_write_same_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { @@ -1779,27 +1785,6 @@ static int device_requires_stable_pages(struct dm_target *ti, return q && blk_queue_stable_writes(q); } -/* - * If any underlying device requires stable pages, a table must require - * them as well. Only targets that support iterate_devices are considered: - * don't want error, zero, etc to require stable pages. - */ -static bool dm_table_requires_stable_pages(struct dm_table *t) -{ - struct dm_target *ti; - unsigned i; - - for (i = 0; i < dm_table_get_num_targets(t); i++) { - ti = dm_table_get_target(t, i); - - if (ti->type->iterate_devices && - ti->type->iterate_devices(ti, device_requires_stable_pages, NULL)) - return true; - } - - return false; -} - void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, struct queue_limits *limits) { @@ -1837,22 +1822,22 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, } blk_queue_write_cache(q, wc, fua); - if (dm_table_supports_dax(t, device_supports_dax, &page_size)) { + if (dm_table_supports_dax(t, device_not_dax_capable, &page_size)) { blk_queue_flag_set(QUEUE_FLAG_DAX, q); - if (dm_table_supports_dax(t, device_dax_synchronous, NULL)) + if (dm_table_supports_dax(t, device_not_dax_synchronous_capable, NULL)) set_dax_synchronous(t->md->dax_dev); } else blk_queue_flag_clear(QUEUE_FLAG_DAX, q); - if (dm_table_supports_dax_write_cache(t)) + if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL)) dax_write_cache(t->md->dax_dev, true); /* Ensure that all underlying devices are non-rotational. */ - if (dm_table_all_devices_attribute(t, device_is_nonrot)) - blk_queue_flag_set(QUEUE_FLAG_NONROT, q); - else + if (dm_table_any_dev_attr(t, device_is_rotational, NULL)) blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); + else + blk_queue_flag_set(QUEUE_FLAG_NONROT, q); if (!dm_table_supports_write_same(t)) q->limits.max_write_same_sectors = 0; @@ -1864,8 +1849,11 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, /* * Some devices don't use blk_integrity but still want stable pages * because they do their own checksumming. + * If any underlying device requires stable pages, a table must require + * them as well. Only targets that support iterate_devices are considered: + * don't want error, zero, etc to require stable pages. */ - if (dm_table_requires_stable_pages(t)) + if (dm_table_any_dev_attr(t, device_requires_stable_pages, NULL)) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, q); else blk_queue_flag_clear(QUEUE_FLAG_STABLE_WRITES, q); @@ -1876,7 +1864,8 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q, * Clear QUEUE_FLAG_ADD_RANDOM if any underlying device does not * have it set. */ - if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random)) + if (blk_queue_add_random(q) && + dm_table_any_dev_attr(t, device_is_not_random, NULL)) blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); /* diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c index fb41b4f23c48..66f4c6398f67 100644 --- a/drivers/md/dm-verity-fec.c +++ b/drivers/md/dm-verity-fec.c @@ -61,19 +61,18 @@ static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio, static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index, unsigned *offset, struct dm_buffer **buf) { - u64 position, block; + u64 position, block, rem; u8 *res; position = (index + rsb) * v->fec->roots; - block = position >> v->data_dev_block_bits; - *offset = (unsigned)(position - (block << v->data_dev_block_bits)); + block = div64_u64_rem(position, v->fec->roots << SECTOR_SHIFT, &rem); + *offset = (unsigned)rem; - res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf); + res = dm_bufio_read(v->fec->bufio, block, buf); if (IS_ERR(res)) { DMERR("%s: FEC %llu: parity read failed (block %llu): %ld", v->data_dev->name, (unsigned long long)rsb, - (unsigned long long)(v->fec->start + block), - PTR_ERR(res)); + (unsigned long long)block, PTR_ERR(res)); *buf = NULL; } @@ -155,7 +154,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio, /* read the next block when we run out of parity bytes */ offset += v->fec->roots; - if (offset >= 1 << v->data_dev_block_bits) { + if (offset >= v->fec->roots << SECTOR_SHIFT) { dm_bufio_release(buf); par = fec_read_parity(v, rsb, block_offset, &offset, &buf); @@ -674,7 +673,7 @@ int verity_fec_ctr(struct dm_verity *v) { struct dm_verity_fec *f = v->fec; struct dm_target *ti = v->ti; - u64 hash_blocks; + u64 hash_blocks, fec_blocks; int ret; if (!verity_fec_is_enabled(v)) { @@ -744,15 +743,17 @@ int verity_fec_ctr(struct dm_verity *v) } f->bufio = dm_bufio_client_create(f->dev->bdev, - 1 << v->data_dev_block_bits, + f->roots << SECTOR_SHIFT, 1, 0, NULL, NULL); if (IS_ERR(f->bufio)) { ti->error = "Cannot initialize FEC bufio client"; return PTR_ERR(f->bufio); } - if (dm_bufio_get_device_size(f->bufio) < - ((f->start + f->rounds * f->roots) >> v->data_dev_block_bits)) { + dm_bufio_set_sector_offset(f->bufio, f->start << (v->data_dev_block_bits - SECTOR_SHIFT)); + + fec_blocks = div64_u64(f->rounds * f->roots, v->fec->roots << SECTOR_SHIFT); + if (dm_bufio_get_device_size(f->bufio) < fec_blocks) { ti->error = "FEC device is too small"; return -E2BIG; } diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index 6b8e5bdd8526..808a98ef624c 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -34,7 +34,7 @@ #define DM_VERITY_OPT_IGN_ZEROES "ignore_zero_blocks" #define DM_VERITY_OPT_AT_MOST_ONCE "check_at_most_once" -#define DM_VERITY_OPTS_MAX (2 + DM_VERITY_OPTS_FEC + \ +#define DM_VERITY_OPTS_MAX (3 + DM_VERITY_OPTS_FEC + \ DM_VERITY_ROOT_HASH_VERIFICATION_OPTS) static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE; diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index d5223a0e5cc5..8628c4aa2e85 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -148,6 +148,7 @@ struct dm_writecache { size_t metadata_sectors; size_t n_blocks; uint64_t seq_count; + sector_t data_device_sectors; void *block_start; struct wc_entry *entries; unsigned block_size; @@ -159,14 +160,22 @@ struct dm_writecache { bool overwrote_committed:1; bool memory_vmapped:1; + bool start_sector_set:1; bool high_wm_percent_set:1; bool low_wm_percent_set:1; bool max_writeback_jobs_set:1; bool autocommit_blocks_set:1; bool autocommit_time_set:1; + bool max_age_set:1; bool writeback_fua_set:1; bool flush_on_suspend:1; bool cleaner:1; + bool cleaner_set:1; + + unsigned high_wm_percent_value; + unsigned low_wm_percent_value; + unsigned autocommit_time_value; + unsigned max_age_value; unsigned writeback_all; struct workqueue_struct *writeback_wq; @@ -523,7 +532,7 @@ static void ssd_commit_superblock(struct dm_writecache *wc) region.bdev = wc->ssd_dev->bdev; region.sector = 0; - region.count = PAGE_SIZE; + region.count = PAGE_SIZE >> SECTOR_SHIFT; if (unlikely(region.sector + region.count > wc->metadata_sectors)) region.count = wc->metadata_sectors - region.sector; @@ -969,6 +978,8 @@ static void writecache_resume(struct dm_target *ti) wc_lock(wc); + wc->data_device_sectors = i_size_read(wc->dev->bdev->bd_inode) >> SECTOR_SHIFT; + if (WC_MODE_PMEM(wc)) { persistent_memory_invalidate_cache(wc->memory_map, wc->memory_map_size); } else { @@ -1638,6 +1649,10 @@ static bool wc_add_block(struct writeback_struct *wb, struct wc_entry *e, gfp_t void *address = memory_data(wc, e); persistent_memory_flush_cache(address, block_size); + + if (unlikely(bio_end_sector(&wb->bio) >= wc->data_device_sectors)) + return true; + return bio_add_page(&wb->bio, persistent_memory_page(address), block_size, persistent_memory_page_offset(address)) != 0; } @@ -1709,6 +1724,9 @@ static void __writecache_writeback_pmem(struct dm_writecache *wc, struct writeba if (writecache_has_error(wc)) { bio->bi_status = BLK_STS_IOERR; bio_endio(bio); + } else if (unlikely(!bio_sectors(bio))) { + bio->bi_status = BLK_STS_OK; + bio_endio(bio); } else { submit_bio(bio); } @@ -1752,6 +1770,14 @@ static void __writecache_writeback_ssd(struct dm_writecache *wc, struct writebac e = f; } + if (unlikely(to.sector + to.count > wc->data_device_sectors)) { + if (to.sector >= wc->data_device_sectors) { + writecache_copy_endio(0, 0, c); + continue; + } + from.count = to.count = wc->data_device_sectors - to.sector; + } + dm_kcopyd_copy(wc->dm_kcopyd, &from, 1, &to, 0, writecache_copy_endio, c); __writeback_throttle(wc, wbl); @@ -2205,6 +2231,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) if (sscanf(string, "%llu%c", &start_sector, &dummy) != 1) goto invalid_optional; wc->start_sector = start_sector; + wc->start_sector_set = true; if (wc->start_sector != start_sector || wc->start_sector >= wc->memory_map_size >> SECTOR_SHIFT) goto invalid_optional; @@ -2214,6 +2241,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) goto invalid_optional; if (high_wm_percent < 0 || high_wm_percent > 100) goto invalid_optional; + wc->high_wm_percent_value = high_wm_percent; wc->high_wm_percent_set = true; } else if (!strcasecmp(string, "low_watermark") && opt_params >= 1) { string = dm_shift_arg(&as), opt_params--; @@ -2221,6 +2249,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) goto invalid_optional; if (low_wm_percent < 0 || low_wm_percent > 100) goto invalid_optional; + wc->low_wm_percent_value = low_wm_percent; wc->low_wm_percent_set = true; } else if (!strcasecmp(string, "writeback_jobs") && opt_params >= 1) { string = dm_shift_arg(&as), opt_params--; @@ -2240,6 +2269,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) if (autocommit_msecs > 3600000) goto invalid_optional; wc->autocommit_jiffies = msecs_to_jiffies(autocommit_msecs); + wc->autocommit_time_value = autocommit_msecs; wc->autocommit_time_set = true; } else if (!strcasecmp(string, "max_age") && opt_params >= 1) { unsigned max_age_msecs; @@ -2249,7 +2279,10 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv) if (max_age_msecs > 86400000) goto invalid_optional; wc->max_age = msecs_to_jiffies(max_age_msecs); + wc->max_age_set = true; + wc->max_age_value = max_age_msecs; } else if (!strcasecmp(string, "cleaner")) { + wc->cleaner_set = true; wc->cleaner = true; } else if (!strcasecmp(string, "fua")) { if (WC_MODE_PMEM(wc)) { @@ -2455,7 +2488,6 @@ static void writecache_status(struct dm_target *ti, status_type_t type, struct dm_writecache *wc = ti->private; unsigned extra_args; unsigned sz = 0; - uint64_t x; switch (type) { case STATUSTYPE_INFO: @@ -2467,11 +2499,11 @@ static void writecache_status(struct dm_target *ti, status_type_t type, DMEMIT("%c %s %s %u ", WC_MODE_PMEM(wc) ? 'p' : 's', wc->dev->name, wc->ssd_dev->name, wc->block_size); extra_args = 0; - if (wc->start_sector) + if (wc->start_sector_set) extra_args += 2; - if (wc->high_wm_percent_set && !wc->cleaner) + if (wc->high_wm_percent_set) extra_args += 2; - if (wc->low_wm_percent_set && !wc->cleaner) + if (wc->low_wm_percent_set) extra_args += 2; if (wc->max_writeback_jobs_set) extra_args += 2; @@ -2479,37 +2511,29 @@ static void writecache_status(struct dm_target *ti, status_type_t type, extra_args += 2; if (wc->autocommit_time_set) extra_args += 2; - if (wc->max_age != MAX_AGE_UNSPECIFIED) + if (wc->max_age_set) extra_args += 2; - if (wc->cleaner) + if (wc->cleaner_set) extra_args++; if (wc->writeback_fua_set) extra_args++; DMEMIT("%u", extra_args); - if (wc->start_sector) + if (wc->start_sector_set) DMEMIT(" start_sector %llu", (unsigned long long)wc->start_sector); - if (wc->high_wm_percent_set && !wc->cleaner) { - x = (uint64_t)wc->freelist_high_watermark * 100; - x += wc->n_blocks / 2; - do_div(x, (size_t)wc->n_blocks); - DMEMIT(" high_watermark %u", 100 - (unsigned)x); - } - if (wc->low_wm_percent_set && !wc->cleaner) { - x = (uint64_t)wc->freelist_low_watermark * 100; - x += wc->n_blocks / 2; - do_div(x, (size_t)wc->n_blocks); - DMEMIT(" low_watermark %u", 100 - (unsigned)x); - } + if (wc->high_wm_percent_set) + DMEMIT(" high_watermark %u", wc->high_wm_percent_value); + if (wc->low_wm_percent_set) + DMEMIT(" low_watermark %u", wc->low_wm_percent_value); if (wc->max_writeback_jobs_set) DMEMIT(" writeback_jobs %u", wc->max_writeback_jobs); if (wc->autocommit_blocks_set) DMEMIT(" autocommit_blocks %u", wc->autocommit_blocks); if (wc->autocommit_time_set) - DMEMIT(" autocommit_time %u", jiffies_to_msecs(wc->autocommit_jiffies)); - if (wc->max_age != MAX_AGE_UNSPECIFIED) - DMEMIT(" max_age %u", jiffies_to_msecs(wc->max_age)); - if (wc->cleaner) + DMEMIT(" autocommit_time %u", wc->autocommit_time_value); + if (wc->max_age_set) + DMEMIT(" max_age %u", wc->max_age_value); + if (wc->cleaner_set) DMEMIT(" cleaner"); if (wc->writeback_fua_set) DMEMIT(" %sfua", wc->writeback_fua ? "" : "no"); @@ -2519,7 +2543,7 @@ static void writecache_status(struct dm_target *ti, status_type_t type, static struct target_type writecache_target = { .name = "writecache", - .version = {1, 3, 0}, + .version = {1, 4, 0}, .module = THIS_MODULE, .ctr = writecache_ctr, .dtr = writecache_dtr, diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index 697f9de37355..7e88df64d197 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -1143,7 +1143,7 @@ static int dmz_message(struct dm_target *ti, unsigned int argc, char **argv, static struct target_type dmz_type = { .name = "zoned", .version = {2, 0, 0}, - .features = DM_TARGET_SINGLETON | DM_TARGET_ZONED_HM, + .features = DM_TARGET_SINGLETON | DM_TARGET_MIXED_ZONED_MODEL, .module = THIS_MODULE, .ctr = dmz_ctr, .dtr = dmz_dtr, diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7bac564f3faa..09542eabf725 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -148,6 +148,16 @@ EXPORT_SYMBOL_GPL(dm_bio_get_target_bio_nr); #define DM_NUMA_NODE NUMA_NO_NODE static int dm_numa_node = DM_NUMA_NODE; +#define DEFAULT_SWAP_BIOS (8 * 1048576 / PAGE_SIZE) +static int swap_bios = DEFAULT_SWAP_BIOS; +static int get_swap_bios(void) +{ + int latch = READ_ONCE(swap_bios); + if (unlikely(latch <= 0)) + latch = DEFAULT_SWAP_BIOS; + return latch; +} + /* * For mempools pre-allocation at the table loading time. */ @@ -969,6 +979,11 @@ void disable_write_zeroes(struct mapped_device *md) limits->max_write_zeroes_sectors = 0; } +static bool swap_bios_limit(struct dm_target *ti, struct bio *bio) +{ + return unlikely((bio->bi_opf & REQ_SWAP) != 0) && unlikely(ti->limit_swap_bios); +} + static void clone_endio(struct bio *bio) { blk_status_t error = bio->bi_status; @@ -1019,6 +1034,11 @@ static void clone_endio(struct bio *bio) } } + if (unlikely(swap_bios_limit(tio->ti, bio))) { + struct mapped_device *md = io->md; + up(&md->swap_bios_semaphore); + } + free_tio(tio); dec_pending(io, error); } @@ -1128,7 +1148,7 @@ static bool dm_dax_supported(struct dax_device *dax_dev, struct block_device *bd if (!map) goto out; - ret = dm_table_supports_dax(map, device_supports_dax, &blocksize); + ret = dm_table_supports_dax(map, device_not_dax_capable, &blocksize); out: dm_put_live_table(md, srcu_idx); @@ -1252,6 +1272,22 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) } EXPORT_SYMBOL_GPL(dm_accept_partial_bio); +static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch) +{ + mutex_lock(&md->swap_bios_lock); + while (latch < md->swap_bios) { + cond_resched(); + down(&md->swap_bios_semaphore); + md->swap_bios--; + } + while (latch > md->swap_bios) { + cond_resched(); + up(&md->swap_bios_semaphore); + md->swap_bios++; + } + mutex_unlock(&md->swap_bios_lock); +} + static blk_qc_t __map_bio(struct dm_target_io *tio) { int r; @@ -1271,6 +1307,14 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) atomic_inc(&io->io_count); sector = clone->bi_iter.bi_sector; + if (unlikely(swap_bios_limit(ti, clone))) { + struct mapped_device *md = io->md; + int latch = get_swap_bios(); + if (unlikely(latch != md->swap_bios)) + __set_swap_bios_limit(md, latch); + down(&md->swap_bios_semaphore); + } + r = ti->type->map(ti, clone); switch (r) { case DM_MAPIO_SUBMITTED: @@ -1281,10 +1325,18 @@ static blk_qc_t __map_bio(struct dm_target_io *tio) ret = submit_bio_noacct(clone); break; case DM_MAPIO_KILL: + if (unlikely(swap_bios_limit(ti, clone))) { + struct mapped_device *md = io->md; + up(&md->swap_bios_semaphore); + } free_tio(tio); dec_pending(io, BLK_STS_IOERR); break; case DM_MAPIO_REQUEUE: + if (unlikely(swap_bios_limit(ti, clone))) { + struct mapped_device *md = io->md; + up(&md->swap_bios_semaphore); + } free_tio(tio); dec_pending(io, BLK_STS_DM_REQUEUE); break; @@ -1747,6 +1799,7 @@ static void cleanup_mapped_device(struct mapped_device *md) mutex_destroy(&md->suspend_lock); mutex_destroy(&md->type_lock); mutex_destroy(&md->table_devices_lock); + mutex_destroy(&md->swap_bios_lock); dm_mq_cleanup_mapped_device(md); } @@ -1814,6 +1867,10 @@ static struct mapped_device *alloc_dev(int minor) init_waitqueue_head(&md->eventq); init_completion(&md->kobj_holder.completion); + md->swap_bios = get_swap_bios(); + sema_init(&md->swap_bios_semaphore, md->swap_bios); + mutex_init(&md->swap_bios_lock); + md->disk->major = _major; md->disk->first_minor = minor; md->disk->fops = &dm_blk_dops; @@ -1959,7 +2016,10 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, if (size != dm_get_size(md)) memset(&md->geometry, 0, sizeof(md->geometry)); - set_capacity_and_notify(md->disk, size); + if (!get_capacity(md->disk)) + set_capacity(md->disk, size); + else + set_capacity_and_notify(md->disk, size); dm_table_event_callback(t, event_callback, md); @@ -3097,6 +3157,9 @@ MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools"); module_param(dm_numa_node, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations"); +module_param(swap_bios, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(swap_bios, "Maximum allowed inflight swap IOs"); + MODULE_DESCRIPTION(DM_NAME " driver"); MODULE_AUTHOR("Joe Thornber "); MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index fffe1e289c53..b441ad772c18 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -73,7 +73,7 @@ void dm_table_free_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); bool dm_table_supports_dax(struct dm_table *t, iterate_devices_callout_fn fn, int *blocksize); -int device_supports_dax(struct dm_target *ti, struct dm_dev *dev, +int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data); void dm_lock_md_type(struct mapped_device *md); diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig index 2b9d81e4794a..6eed3209ee2d 100644 --- a/drivers/media/i2c/Kconfig +++ b/drivers/media/i2c/Kconfig @@ -1000,6 +1000,7 @@ config VIDEO_OV772X tristate "OmniVision OV772x sensor support" depends on I2C && VIDEO_V4L2 select REGMAP_SCCB + select V4L2_FWNODE help This is a Video4Linux2 sensor driver for the OmniVision OV772x camera. diff --git a/drivers/media/i2c/max9286.c b/drivers/media/i2c/max9286.c index c82c1493e099..b1e2476d3c9e 100644 --- a/drivers/media/i2c/max9286.c +++ b/drivers/media/i2c/max9286.c @@ -580,7 +580,7 @@ static int max9286_v4l2_notifier_register(struct max9286_priv *priv) asd = v4l2_async_notifier_add_fwnode_subdev(&priv->notifier, source->fwnode, - sizeof(*asd)); + sizeof(struct max9286_asd)); if (IS_ERR(asd)) { dev_err(dev, "Failed to add subdev for source %u: %ld", i, PTR_ERR(asd)); diff --git a/drivers/media/i2c/ov5670.c b/drivers/media/i2c/ov5670.c index 148fd4e05029..866c8c2e8f59 100644 --- a/drivers/media/i2c/ov5670.c +++ b/drivers/media/i2c/ov5670.c @@ -2084,7 +2084,8 @@ static int ov5670_init_controls(struct ov5670 *ov5670) /* By default, V4L2_CID_PIXEL_RATE is read only */ ov5670->pixel_rate = v4l2_ctrl_new_std(ctrl_hdlr, &ov5670_ctrl_ops, - V4L2_CID_PIXEL_RATE, 0, + V4L2_CID_PIXEL_RATE, + link_freq_configs[0].pixel_rate, link_freq_configs[0].pixel_rate, 1, link_freq_configs[0].pixel_rate); diff --git a/drivers/media/pci/cx25821/cx25821-core.c b/drivers/media/pci/cx25821/cx25821-core.c index 6f8ffab8840f..07b6d0c49bbf 100644 --- a/drivers/media/pci/cx25821/cx25821-core.c +++ b/drivers/media/pci/cx25821/cx25821-core.c @@ -976,8 +976,10 @@ int cx25821_riscmem_alloc(struct pci_dev *pci, __le32 *cpu; dma_addr_t dma = 0; - if (NULL != risc->cpu && risc->size < size) + if (risc->cpu && risc->size < size) { pci_free_consistent(pci, risc->size, risc->cpu, risc->dma); + risc->cpu = NULL; + } if (NULL == risc->cpu) { cpu = pci_zalloc_consistent(pci, size, &dma); if (NULL == cpu) diff --git a/drivers/media/pci/intel/ipu3/Kconfig b/drivers/media/pci/intel/ipu3/Kconfig index 82d7f17e6a02..7a805201034b 100644 --- a/drivers/media/pci/intel/ipu3/Kconfig +++ b/drivers/media/pci/intel/ipu3/Kconfig @@ -2,7 +2,8 @@ config VIDEO_IPU3_CIO2 tristate "Intel ipu3-cio2 driver" depends on VIDEO_V4L2 && PCI - depends on (X86 && ACPI) || COMPILE_TEST + depends on ACPI || COMPILE_TEST + depends on X86 select MEDIA_CONTROLLER select VIDEO_V4L2_SUBDEV_API select V4L2_FWNODE diff --git a/drivers/media/pci/intel/ipu3/ipu3-cio2.c b/drivers/media/pci/intel/ipu3/ipu3-cio2.c index 6cada8a6e50c..143ba9d90342 100644 --- a/drivers/media/pci/intel/ipu3/ipu3-cio2.c +++ b/drivers/media/pci/intel/ipu3/ipu3-cio2.c @@ -1269,7 +1269,7 @@ static int cio2_subdev_set_fmt(struct v4l2_subdev *sd, fmt->format.code = formats[0].mbus_code; for (i = 0; i < ARRAY_SIZE(formats); i++) { - if (formats[i].mbus_code == fmt->format.code) { + if (formats[i].mbus_code == mbus_code) { fmt->format.code = mbus_code; break; } diff --git a/drivers/media/pci/saa7134/saa7134-empress.c b/drivers/media/pci/saa7134/saa7134-empress.c index 39e3c7f8c5b4..76a37fbd8458 100644 --- a/drivers/media/pci/saa7134/saa7134-empress.c +++ b/drivers/media/pci/saa7134/saa7134-empress.c @@ -282,8 +282,11 @@ static int empress_init(struct saa7134_dev *dev) q->lock = &dev->lock; q->dev = &dev->pci->dev; err = vb2_queue_init(q); - if (err) + if (err) { + video_device_release(dev->empress_dev); + dev->empress_dev = NULL; return err; + } dev->empress_dev->queue = q; dev->empress_dev->device_caps = V4L2_CAP_READWRITE | V4L2_CAP_STREAMING | V4L2_CAP_VIDEO_CAPTURE; diff --git a/drivers/media/pci/smipcie/smipcie-ir.c b/drivers/media/pci/smipcie/smipcie-ir.c index e6b74e161a05..c0604d9c7011 100644 --- a/drivers/media/pci/smipcie/smipcie-ir.c +++ b/drivers/media/pci/smipcie/smipcie-ir.c @@ -60,38 +60,44 @@ static void smi_ir_decode(struct smi_rc *ir) { struct smi_dev *dev = ir->dev; struct rc_dev *rc_dev = ir->rc_dev; - u32 dwIRControl, dwIRData; - u8 index, ucIRCount, readLoop; + u32 control, data; + u8 index, ir_count, read_loop; - dwIRControl = smi_read(IR_Init_Reg); + control = smi_read(IR_Init_Reg); - if (dwIRControl & rbIRVld) { - ucIRCount = (u8) smi_read(IR_Data_Cnt); + dev_dbg(&rc_dev->dev, "ircontrol: 0x%08x\n", control); - readLoop = ucIRCount/4; - if (ucIRCount % 4) - readLoop += 1; - for (index = 0; index < readLoop; index++) { - dwIRData = smi_read(IR_DATA_BUFFER_BASE + (index * 4)); + if (control & rbIRVld) { + ir_count = (u8)smi_read(IR_Data_Cnt); - ir->irData[index*4 + 0] = (u8)(dwIRData); - ir->irData[index*4 + 1] = (u8)(dwIRData >> 8); - ir->irData[index*4 + 2] = (u8)(dwIRData >> 16); - ir->irData[index*4 + 3] = (u8)(dwIRData >> 24); + dev_dbg(&rc_dev->dev, "ircount %d\n", ir_count); + + read_loop = ir_count / 4; + if (ir_count % 4) + read_loop += 1; + for (index = 0; index < read_loop; index++) { + data = smi_read(IR_DATA_BUFFER_BASE + (index * 4)); + dev_dbg(&rc_dev->dev, "IRData 0x%08x\n", data); + + ir->irData[index * 4 + 0] = (u8)(data); + ir->irData[index * 4 + 1] = (u8)(data >> 8); + ir->irData[index * 4 + 2] = (u8)(data >> 16); + ir->irData[index * 4 + 3] = (u8)(data >> 24); } - smi_raw_process(rc_dev, ir->irData, ucIRCount); - smi_set(IR_Init_Reg, rbIRVld); + smi_raw_process(rc_dev, ir->irData, ir_count); } - if (dwIRControl & rbIRhighidle) { + if (control & rbIRhighidle) { struct ir_raw_event rawir = {}; + dev_dbg(&rc_dev->dev, "high idle\n"); + rawir.pulse = 0; rawir.duration = SMI_SAMPLE_PERIOD * SMI_SAMPLE_IDLEMIN; ir_raw_event_store_with_filter(rc_dev, &rawir); - smi_set(IR_Init_Reg, rbIRhighidle); } + smi_set(IR_Init_Reg, rbIRVld); ir_raw_event_handle(rc_dev); } @@ -150,7 +156,7 @@ int smi_ir_init(struct smi_dev *dev) rc_dev->dev.parent = &dev->pci_dev->dev; rc_dev->map_name = dev->info->rc_map; - rc_dev->timeout = MS_TO_US(100); + rc_dev->timeout = SMI_SAMPLE_PERIOD * SMI_SAMPLE_IDLEMIN; rc_dev->rx_resolution = SMI_SAMPLE_PERIOD; ir->rc_dev = rc_dev; @@ -173,7 +179,7 @@ void smi_ir_exit(struct smi_dev *dev) struct smi_rc *ir = &dev->ir; struct rc_dev *rc_dev = ir->rc_dev; - smi_ir_stop(ir); rc_unregister_device(rc_dev); + smi_ir_stop(ir); ir->rc_dev = NULL; } diff --git a/drivers/media/platform/aspeed-video.c b/drivers/media/platform/aspeed-video.c index c46a79eace98..f2c4dadd6a0e 100644 --- a/drivers/media/platform/aspeed-video.c +++ b/drivers/media/platform/aspeed-video.c @@ -1551,12 +1551,12 @@ static int aspeed_video_setup_video(struct aspeed_video *video) V4L2_JPEG_CHROMA_SUBSAMPLING_420, mask, V4L2_JPEG_CHROMA_SUBSAMPLING_444); - if (video->ctrl_handler.error) { + rc = video->ctrl_handler.error; + if (rc) { v4l2_ctrl_handler_free(&video->ctrl_handler); v4l2_device_unregister(v4l2_dev); - dev_err(video->dev, "Failed to init controls: %d\n", - video->ctrl_handler.error); + dev_err(video->dev, "Failed to init controls: %d\n", rc); return rc; } diff --git a/drivers/media/platform/marvell-ccic/mcam-core.c b/drivers/media/platform/marvell-ccic/mcam-core.c index c012fd2e1d29..34266fba824f 100644 --- a/drivers/media/platform/marvell-ccic/mcam-core.c +++ b/drivers/media/platform/marvell-ccic/mcam-core.c @@ -931,6 +931,7 @@ static int mclk_enable(struct clk_hw *hw) mclk_div = 2; } + pm_runtime_get_sync(cam->dev); clk_enable(cam->clk[0]); mcam_reg_write(cam, REG_CLKCTRL, (mclk_src << 29) | mclk_div); mcam_ctlr_power_up(cam); @@ -944,6 +945,7 @@ static void mclk_disable(struct clk_hw *hw) mcam_ctlr_power_down(cam); clk_disable(cam->clk[0]); + pm_runtime_put(cam->dev); } static unsigned long mclk_recalc_rate(struct clk_hw *hw, diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c index dfb42e19bf81..be3842e6ca47 100644 --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_enc_drv.c @@ -303,7 +303,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev) ret = PTR_ERR((__force void *)dev->reg_base[VENC_SYS]); goto err_res; } - mtk_v4l2_debug(2, "reg[%d] base=0x%p", i, dev->reg_base[VENC_SYS]); + mtk_v4l2_debug(2, "reg[%d] base=0x%p", VENC_SYS, dev->reg_base[VENC_SYS]); res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); if (res == NULL) { @@ -332,7 +332,7 @@ static int mtk_vcodec_probe(struct platform_device *pdev) ret = PTR_ERR((__force void *)dev->reg_base[VENC_LT_SYS]); goto err_res; } - mtk_v4l2_debug(2, "reg[%d] base=0x%p", i, dev->reg_base[VENC_LT_SYS]); + mtk_v4l2_debug(2, "reg[%d] base=0x%p", VENC_LT_SYS, dev->reg_base[VENC_LT_SYS]); dev->enc_lt_irq = platform_get_irq(pdev, 1); irq_set_status_flags(dev->enc_lt_irq, IRQ_NOAUTOEN); diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c index 5ea153a68522..d9880210b2ab 100644 --- a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c +++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_if.c @@ -890,7 +890,8 @@ static int vdec_vp9_decode(void *h_vdec, struct mtk_vcodec_mem *bs, memset(inst->seg_id_buf.va, 0, inst->seg_id_buf.size); if (vsi->show_frame & BIT(2)) { - if (vpu_dec_start(&inst->vpu, NULL, 0)) { + ret = vpu_dec_start(&inst->vpu, NULL, 0); + if (ret) { mtk_vcodec_err(inst, "vpu trig decoder failed"); goto DECODE_ERROR; } diff --git a/drivers/media/platform/pxa_camera.c b/drivers/media/platform/pxa_camera.c index b664ce7558a1..75fad9689c90 100644 --- a/drivers/media/platform/pxa_camera.c +++ b/drivers/media/platform/pxa_camera.c @@ -1386,6 +1386,9 @@ static int pxac_vb2_prepare(struct vb2_buffer *vb) struct pxa_camera_dev *pcdev = vb2_get_drv_priv(vb->vb2_queue); struct pxa_buffer *buf = vb2_to_pxa_buffer(vb); int ret = 0; +#ifdef DEBUG + int i; +#endif switch (pcdev->channels) { case 1: diff --git a/drivers/media/platform/qcom/camss/camss-video.c b/drivers/media/platform/qcom/camss/camss-video.c index bd9334af1c73..97cea7c4d769 100644 --- a/drivers/media/platform/qcom/camss/camss-video.c +++ b/drivers/media/platform/qcom/camss/camss-video.c @@ -579,7 +579,7 @@ static int video_enum_fmt(struct file *file, void *fh, struct v4l2_fmtdesc *f) break; } - if (k < f->index) + if (k == -1 || k < f->index) /* * All the unique pixel formats matching the arguments * have been enumerated (k >= 0 and f->index > 0), or @@ -961,6 +961,7 @@ int msm_video_register(struct camss_video *video, struct v4l2_device *v4l2_dev, video->nformats = ARRAY_SIZE(formats_rdi_8x96); } } else { + ret = -EINVAL; goto error_video_register; } diff --git a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c index aa5f45749543..a60c302ef267 100644 --- a/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c +++ b/drivers/media/platform/rockchip/rkisp1/rkisp1-params.c @@ -1288,7 +1288,6 @@ static void rkisp1_params_config_parameter(struct rkisp1_params *params) memset(hst.hist_weight, 0x01, sizeof(hst.hist_weight)); rkisp1_hst_config(params, &hst); rkisp1_param_set_bits(params, RKISP1_CIF_ISP_HIST_PROP, - ~RKISP1_CIF_ISP_HIST_PROP_MODE_MASK | rkisp1_hst_params_default_config.mode); /* set the range */ diff --git a/drivers/media/platform/ti-vpe/cal.c b/drivers/media/platform/ti-vpe/cal.c index 59a0266b1f39..2eef245c31a1 100644 --- a/drivers/media/platform/ti-vpe/cal.c +++ b/drivers/media/platform/ti-vpe/cal.c @@ -406,7 +406,7 @@ static irqreturn_t cal_irq(int irq_cal, void *data) */ struct cal_v4l2_async_subdev { - struct v4l2_async_subdev asd; + struct v4l2_async_subdev asd; /* Must be first */ struct cal_camerarx *phy; }; @@ -472,7 +472,7 @@ static int cal_async_notifier_register(struct cal_dev *cal) fwnode = of_fwnode_handle(phy->sensor_node); asd = v4l2_async_notifier_add_fwnode_subdev(&cal->notifier, fwnode, - sizeof(*asd)); + sizeof(*casd)); if (IS_ERR(asd)) { phy_err(phy, "Failed to add subdev to notifier\n"); ret = PTR_ERR(asd); diff --git a/drivers/media/platform/vsp1/vsp1_drm.c b/drivers/media/platform/vsp1/vsp1_drm.c index 86d5e3f4b1ff..06f74d410973 100644 --- a/drivers/media/platform/vsp1/vsp1_drm.c +++ b/drivers/media/platform/vsp1/vsp1_drm.c @@ -245,7 +245,7 @@ static int vsp1_du_pipeline_setup_brx(struct vsp1_device *vsp1, brx = &vsp1->bru->entity; else if (pipe->brx && !drm_pipe->force_brx_release) brx = pipe->brx; - else if (!vsp1->bru->entity.pipe) + else if (vsp1_feature(vsp1, VSP1_HAS_BRU) && !vsp1->bru->entity.pipe) brx = &vsp1->bru->entity; else brx = &vsp1->brs->entity; @@ -462,9 +462,9 @@ static int vsp1_du_pipeline_setup_inputs(struct vsp1_device *vsp1, * make sure it is present in the pipeline's list of entities if it * wasn't already. */ - if (!use_uif) { + if (drm_pipe->uif && !use_uif) { drm_pipe->uif->pipe = NULL; - } else if (!drm_pipe->uif->pipe) { + } else if (drm_pipe->uif && !drm_pipe->uif->pipe) { drm_pipe->uif->pipe = pipe; list_add_tail(&drm_pipe->uif->list_pipe, &pipe->entities); } diff --git a/drivers/media/platform/vsp1/vsp1_drv.c b/drivers/media/platform/vsp1/vsp1_drv.c index dc62533cf32c..aa66e4f5f3f3 100644 --- a/drivers/media/platform/vsp1/vsp1_drv.c +++ b/drivers/media/platform/vsp1/vsp1_drv.c @@ -882,8 +882,10 @@ static int vsp1_probe(struct platform_device *pdev) } done: - if (ret) + if (ret) { pm_runtime_disable(&pdev->dev); + rcar_fcp_put(vsp1->fcp); + } return ret; } diff --git a/drivers/media/rc/Makefile b/drivers/media/rc/Makefile index 5bb2932ab119..ff6a8fc4c38e 100644 --- a/drivers/media/rc/Makefile +++ b/drivers/media/rc/Makefile @@ -5,6 +5,7 @@ obj-y += keymaps/ obj-$(CONFIG_RC_CORE) += rc-core.o rc-core-y := rc-main.o rc-ir-raw.o rc-core-$(CONFIG_LIRC) += lirc_dev.o +rc-core-$(CONFIG_MEDIA_CEC_RC) += keymaps/rc-cec.o rc-core-$(CONFIG_BPF_LIRC_MODE2) += bpf-lirc.o obj-$(CONFIG_IR_NEC_DECODER) += ir-nec-decoder.o obj-$(CONFIG_IR_RC5_DECODER) += ir-rc5-decoder.o diff --git a/drivers/media/rc/ir_toy.c b/drivers/media/rc/ir_toy.c index e0242c9b6aeb..3e729a17b35f 100644 --- a/drivers/media/rc/ir_toy.c +++ b/drivers/media/rc/ir_toy.c @@ -491,6 +491,7 @@ static void irtoy_disconnect(struct usb_interface *intf) static const struct usb_device_id irtoy_table[] = { { USB_DEVICE_INTERFACE_CLASS(0x04d8, 0xfd08, USB_CLASS_CDC_DATA) }, + { USB_DEVICE_INTERFACE_CLASS(0x04d8, 0xf58b, USB_CLASS_CDC_DATA) }, { } }; diff --git a/drivers/media/rc/keymaps/Makefile b/drivers/media/rc/keymaps/Makefile index b252a1d2ebd6..cc6662e1903f 100644 --- a/drivers/media/rc/keymaps/Makefile +++ b/drivers/media/rc/keymaps/Makefile @@ -21,7 +21,6 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \ rc-behold.o \ rc-behold-columbus.o \ rc-budget-ci-old.o \ - rc-cec.o \ rc-cinergy-1400.o \ rc-cinergy.o \ rc-d680-dmb.o \ diff --git a/drivers/media/rc/keymaps/rc-cec.c b/drivers/media/rc/keymaps/rc-cec.c index 3e3bd11092b4..068e22aeac8c 100644 --- a/drivers/media/rc/keymaps/rc-cec.c +++ b/drivers/media/rc/keymaps/rc-cec.c @@ -1,5 +1,15 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* Keytable for the CEC remote control + * + * This keymap is unusual in that it can't be built as a module, + * instead it is registered directly in rc-main.c if CONFIG_MEDIA_CEC_RC + * is set. This is because it can be called from drm_dp_cec_set_edid() via + * cec_register_adapter() in an asynchronous context, and it is not + * allowed to use request_module() to load rc-cec.ko in that case. + * + * Since this keymap is only used if CONFIG_MEDIA_CEC_RC is set, we + * just compile this keymap into the rc-core module and never as a + * separate module. * * Copyright (c) 2015 by Kamil Debski */ @@ -152,7 +162,7 @@ static struct rc_map_table cec[] = { /* 0x77-0xff: Reserved */ }; -static struct rc_map_list cec_map = { +struct rc_map_list cec_map = { .map = { .scan = cec, .size = ARRAY_SIZE(cec), @@ -160,19 +170,3 @@ static struct rc_map_list cec_map = { .name = RC_MAP_CEC, } }; - -static int __init init_rc_map_cec(void) -{ - return rc_map_register(&cec_map); -} - -static void __exit exit_rc_map_cec(void) -{ - rc_map_unregister(&cec_map); -} - -module_init(init_rc_map_cec); -module_exit(exit_rc_map_cec); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Kamil Debski"); diff --git a/drivers/media/rc/mceusb.c b/drivers/media/rc/mceusb.c index f1dbd059ed08..5642595a057e 100644 --- a/drivers/media/rc/mceusb.c +++ b/drivers/media/rc/mceusb.c @@ -701,11 +701,18 @@ static void mceusb_dev_printdata(struct mceusb_dev *ir, u8 *buf, int buf_len, data[0], data[1]); break; case MCE_RSP_EQIRCFS: + if (!data[0] && !data[1]) { + dev_dbg(dev, "%s: no carrier", inout); + break; + } + // prescaler should make sense + if (data[0] > 8) + break; period = DIV_ROUND_CLOSEST((1U << data[0] * 2) * (data[1] + 1), 10); if (!period) break; - carrier = (1000 * 1000) / period; + carrier = USEC_PER_SEC / period; dev_dbg(dev, "%s carrier of %u Hz (period %uus)", inout, carrier, period); break; @@ -1169,7 +1176,7 @@ static void mceusb_handle_command(struct mceusb_dev *ir, u8 *buf_in) switch (subcmd) { /* the one and only 5-byte return value command */ case MCE_RSP_GETPORTSTATUS: - if (buf_in[5] == 0) + if (buf_in[5] == 0 && *hi < 8) ir->txports_cabled |= 1 << *hi; break; diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c index 1fd62c1dac76..8e88dc8ea6c5 100644 --- a/drivers/media/rc/rc-main.c +++ b/drivers/media/rc/rc-main.c @@ -2069,6 +2069,9 @@ static int __init rc_core_init(void) led_trigger_register_simple("rc-feedback", &led_feedback); rc_map_register(&empty_map); +#ifdef CONFIG_MEDIA_CEC_RC + rc_map_register(&cec_map); +#endif return 0; } @@ -2078,6 +2081,9 @@ static void __exit rc_core_exit(void) lirc_dev_exit(); class_unregister(&rc_class); led_trigger_unregister_simple(led_feedback); +#ifdef CONFIG_MEDIA_CEC_RC + rc_map_unregister(&cec_map); +#endif rc_map_unregister(&empty_map); } diff --git a/drivers/media/test-drivers/vidtv/vidtv_psi.c b/drivers/media/test-drivers/vidtv/vidtv_psi.c index 4511a2a98405..1724bb485e67 100644 --- a/drivers/media/test-drivers/vidtv/vidtv_psi.c +++ b/drivers/media/test-drivers/vidtv/vidtv_psi.c @@ -1164,6 +1164,8 @@ u32 vidtv_psi_pmt_write_into(struct vidtv_psi_pmt_write_args *args) struct vidtv_psi_desc *table_descriptor = args->pmt->descriptor; struct vidtv_psi_table_pmt_stream *stream = args->pmt->stream; struct vidtv_psi_desc *stream_descriptor; + u32 crc = INITIAL_CRC; + u32 nbytes = 0; struct header_write_args h_args = { .dest_buf = args->buf, .dest_offset = args->offset, @@ -1181,6 +1183,7 @@ u32 vidtv_psi_pmt_write_into(struct vidtv_psi_pmt_write_args *args) .new_psi_section = false, .is_crc = false, .dest_buf_sz = args->buf_sz, + .crc = &crc, }; struct desc_write_args d_args = { .dest_buf = args->buf, @@ -1193,8 +1196,6 @@ u32 vidtv_psi_pmt_write_into(struct vidtv_psi_pmt_write_args *args) .pid = args->pid, .dest_buf_sz = args->buf_sz, }; - u32 crc = INITIAL_CRC; - u32 nbytes = 0; vidtv_psi_pmt_table_update_sec_len(args->pmt); diff --git a/drivers/media/tuners/qm1d1c0042.c b/drivers/media/tuners/qm1d1c0042.c index 0e26d22f0b26..53aa2558f71e 100644 --- a/drivers/media/tuners/qm1d1c0042.c +++ b/drivers/media/tuners/qm1d1c0042.c @@ -343,8 +343,10 @@ static int qm1d1c0042_init(struct dvb_frontend *fe) if (val == reg_initval[reg_index][0x00]) break; } - if (reg_index >= QM1D1C0042_NUM_REG_ROWS) + if (reg_index >= QM1D1C0042_NUM_REG_ROWS) { + ret = -EINVAL; goto failed; + } memcpy(state->regs, reg_initval[reg_index], QM1D1C0042_NUM_REGS); usleep_range(2000, 3000); diff --git a/drivers/media/usb/dvb-usb-v2/lmedm04.c b/drivers/media/usb/dvb-usb-v2/lmedm04.c index 5a7a9522d46d..9ddda8d68ee0 100644 --- a/drivers/media/usb/dvb-usb-v2/lmedm04.c +++ b/drivers/media/usb/dvb-usb-v2/lmedm04.c @@ -391,7 +391,7 @@ static int lme2510_int_read(struct dvb_usb_adapter *adap) ep = usb_pipe_endpoint(d->udev, lme_int->lme_urb->pipe); if (usb_endpoint_type(&ep->desc) == USB_ENDPOINT_XFER_BULK) - lme_int->lme_urb->pipe = usb_rcvbulkpipe(d->udev, 0xa), + lme_int->lme_urb->pipe = usb_rcvbulkpipe(d->udev, 0xa); usb_submit_urb(lme_int->lme_urb, GFP_ATOMIC); info("INT Interrupt Service Started"); diff --git a/drivers/media/usb/em28xx/em28xx-core.c b/drivers/media/usb/em28xx/em28xx-core.c index e6088b5d1b80..3daa64bb1e1d 100644 --- a/drivers/media/usb/em28xx/em28xx-core.c +++ b/drivers/media/usb/em28xx/em28xx-core.c @@ -956,14 +956,10 @@ int em28xx_alloc_urbs(struct em28xx *dev, enum em28xx_mode mode, int xfer_bulk, usb_bufs->buf[i] = kzalloc(sb_size, GFP_KERNEL); if (!usb_bufs->buf[i]) { - em28xx_uninit_usb_xfer(dev, mode); - for (i--; i >= 0; i--) kfree(usb_bufs->buf[i]); - kfree(usb_bufs->buf); - usb_bufs->buf = NULL; - + em28xx_uninit_usb_xfer(dev, mode); return -ENOMEM; } diff --git a/drivers/media/usb/pwc/pwc-if.c b/drivers/media/usb/pwc/pwc-if.c index 61869636ec61..5e3339cc31c0 100644 --- a/drivers/media/usb/pwc/pwc-if.c +++ b/drivers/media/usb/pwc/pwc-if.c @@ -155,16 +155,17 @@ static const struct video_device pwc_template = { /***************************************************************************/ /* Private functions */ -static void *pwc_alloc_urb_buffer(struct device *dev, +static void *pwc_alloc_urb_buffer(struct usb_device *dev, size_t size, dma_addr_t *dma_handle) { + struct device *dmadev = dev->bus->sysdev; void *buffer = kmalloc(size, GFP_KERNEL); if (!buffer) return NULL; - *dma_handle = dma_map_single(dev, buffer, size, DMA_FROM_DEVICE); - if (dma_mapping_error(dev, *dma_handle)) { + *dma_handle = dma_map_single(dmadev, buffer, size, DMA_FROM_DEVICE); + if (dma_mapping_error(dmadev, *dma_handle)) { kfree(buffer); return NULL; } @@ -172,12 +173,14 @@ static void *pwc_alloc_urb_buffer(struct device *dev, return buffer; } -static void pwc_free_urb_buffer(struct device *dev, +static void pwc_free_urb_buffer(struct usb_device *dev, size_t size, void *buffer, dma_addr_t dma_handle) { - dma_unmap_single(dev, dma_handle, size, DMA_FROM_DEVICE); + struct device *dmadev = dev->bus->sysdev; + + dma_unmap_single(dmadev, dma_handle, size, DMA_FROM_DEVICE); kfree(buffer); } @@ -282,6 +285,7 @@ static void pwc_frame_complete(struct pwc_device *pdev) static void pwc_isoc_handler(struct urb *urb) { struct pwc_device *pdev = (struct pwc_device *)urb->context; + struct device *dmadev = urb->dev->bus->sysdev; int i, fst, flen; unsigned char *iso_buf = NULL; @@ -328,7 +332,7 @@ static void pwc_isoc_handler(struct urb *urb) /* Reset ISOC error counter. We did get here, after all. */ pdev->visoc_errors = 0; - dma_sync_single_for_cpu(&urb->dev->dev, + dma_sync_single_for_cpu(dmadev, urb->transfer_dma, urb->transfer_buffer_length, DMA_FROM_DEVICE); @@ -379,7 +383,7 @@ static void pwc_isoc_handler(struct urb *urb) pdev->vlast_packet_size = flen; } - dma_sync_single_for_device(&urb->dev->dev, + dma_sync_single_for_device(dmadev, urb->transfer_dma, urb->transfer_buffer_length, DMA_FROM_DEVICE); @@ -461,7 +465,7 @@ static int pwc_isoc_init(struct pwc_device *pdev) urb->pipe = usb_rcvisocpipe(udev, pdev->vendpoint); urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP; urb->transfer_buffer_length = ISO_BUFFER_SIZE; - urb->transfer_buffer = pwc_alloc_urb_buffer(&udev->dev, + urb->transfer_buffer = pwc_alloc_urb_buffer(udev, urb->transfer_buffer_length, &urb->transfer_dma); if (urb->transfer_buffer == NULL) { @@ -524,7 +528,7 @@ static void pwc_iso_free(struct pwc_device *pdev) if (urb) { PWC_DEBUG_MEMORY("Freeing URB\n"); if (urb->transfer_buffer) - pwc_free_urb_buffer(&urb->dev->dev, + pwc_free_urb_buffer(urb->dev, urb->transfer_buffer_length, urb->transfer_buffer, urb->transfer_dma); diff --git a/drivers/media/usb/tm6000/tm6000-dvb.c b/drivers/media/usb/tm6000/tm6000-dvb.c index 19c90fa9e443..293a460f4616 100644 --- a/drivers/media/usb/tm6000/tm6000-dvb.c +++ b/drivers/media/usb/tm6000/tm6000-dvb.c @@ -141,6 +141,10 @@ static int tm6000_start_stream(struct tm6000_core *dev) if (ret < 0) { printk(KERN_ERR "tm6000: error %i in %s during pipe reset\n", ret, __func__); + + kfree(dvb->bulk_urb->transfer_buffer); + usb_free_urb(dvb->bulk_urb); + dvb->bulk_urb = NULL; return ret; } else printk(KERN_ERR "tm6000: pipe reset\n"); diff --git a/drivers/media/usb/usbtv/usbtv-audio.c b/drivers/media/usb/usbtv/usbtv-audio.c index b57e94fb1977..333bd305a4f9 100644 --- a/drivers/media/usb/usbtv/usbtv-audio.c +++ b/drivers/media/usb/usbtv/usbtv-audio.c @@ -371,7 +371,7 @@ void usbtv_audio_free(struct usbtv *usbtv) cancel_work_sync(&usbtv->snd_trigger); if (usbtv->snd && usbtv->udev) { - snd_card_free(usbtv->snd); + snd_card_free_when_closed(usbtv->snd); usbtv->snd = NULL; } } diff --git a/drivers/media/usb/uvc/uvc_driver.c b/drivers/media/usb/uvc/uvc_driver.c index ddb9eaa11be7..5ad528264135 100644 --- a/drivers/media/usb/uvc/uvc_driver.c +++ b/drivers/media/usb/uvc/uvc_driver.c @@ -1028,7 +1028,10 @@ static struct uvc_entity *uvc_alloc_entity(u16 type, u8 id, unsigned int i; extra_size = roundup(extra_size, sizeof(*entity->pads)); - num_inputs = (type & UVC_TERM_OUTPUT) ? num_pads : num_pads - 1; + if (num_pads) + num_inputs = type & UVC_TERM_OUTPUT ? num_pads : num_pads - 1; + else + num_inputs = 0; size = sizeof(*entity) + extra_size + sizeof(*entity->pads) * num_pads + num_inputs; entity = kzalloc(size, GFP_KERNEL); @@ -1044,7 +1047,7 @@ static struct uvc_entity *uvc_alloc_entity(u16 type, u8 id, for (i = 0; i < num_inputs; ++i) entity->pads[i].flags = MEDIA_PAD_FL_SINK; - if (!UVC_ENTITY_IS_OTERM(entity)) + if (!UVC_ENTITY_IS_OTERM(entity) && num_pads) entity->pads[num_pads-1].flags = MEDIA_PAD_FL_SOURCE; entity->bNrInPins = num_inputs; diff --git a/drivers/media/usb/uvc/uvc_v4l2.c b/drivers/media/usb/uvc/uvc_v4l2.c index fa06bfa174ad..c7172b8952a9 100644 --- a/drivers/media/usb/uvc/uvc_v4l2.c +++ b/drivers/media/usb/uvc/uvc_v4l2.c @@ -248,7 +248,9 @@ static int uvc_v4l2_try_format(struct uvc_streaming *stream, goto done; /* After the probe, update fmt with the values returned from - * negotiation with the device. + * negotiation with the device. Some devices return invalid bFormatIndex + * and bFrameIndex values, in which case we can only assume they have + * accepted the requested format as-is. */ for (i = 0; i < stream->nformats; ++i) { if (probe->bFormatIndex == stream->format[i].index) { @@ -257,11 +259,10 @@ static int uvc_v4l2_try_format(struct uvc_streaming *stream, } } - if (i == stream->nformats) { - uvc_trace(UVC_TRACE_FORMAT, "Unknown bFormatIndex %u\n", + if (i == stream->nformats) + uvc_trace(UVC_TRACE_FORMAT, + "Unknown bFormatIndex %u, using default\n", probe->bFormatIndex); - return -EINVAL; - } for (i = 0; i < format->nframes; ++i) { if (probe->bFrameIndex == format->frame[i].bFrameIndex) { @@ -270,11 +271,10 @@ static int uvc_v4l2_try_format(struct uvc_streaming *stream, } } - if (i == format->nframes) { - uvc_trace(UVC_TRACE_FORMAT, "Unknown bFrameIndex %u\n", + if (i == format->nframes) + uvc_trace(UVC_TRACE_FORMAT, + "Unknown bFrameIndex %u, using default\n", probe->bFrameIndex); - return -EINVAL; - } fmt->fmt.pix.width = frame->wWidth; fmt->fmt.pix.height = frame->wHeight; diff --git a/drivers/media/usb/zr364xx/zr364xx.c b/drivers/media/usb/zr364xx/zr364xx.c index 1e1c6b4d1874..d29b861367ea 100644 --- a/drivers/media/usb/zr364xx/zr364xx.c +++ b/drivers/media/usb/zr364xx/zr364xx.c @@ -1181,15 +1181,11 @@ static int zr364xx_open(struct file *file) return err; } -static void zr364xx_release(struct v4l2_device *v4l2_dev) +static void zr364xx_board_uninit(struct zr364xx_camera *cam) { - struct zr364xx_camera *cam = - container_of(v4l2_dev, struct zr364xx_camera, v4l2_dev); unsigned long i; - v4l2_device_unregister(&cam->v4l2_dev); - - videobuf_mmap_free(&cam->vb_vidq); + zr364xx_stop_readpipe(cam); /* release sys buffers */ for (i = 0; i < FRAMES; i++) { @@ -1200,9 +1196,19 @@ static void zr364xx_release(struct v4l2_device *v4l2_dev) cam->buffer.frame[i].lpvbits = NULL; } - v4l2_ctrl_handler_free(&cam->ctrl_handler); /* release transfer buffer */ kfree(cam->pipe->transfer_buffer); +} + +static void zr364xx_release(struct v4l2_device *v4l2_dev) +{ + struct zr364xx_camera *cam = + container_of(v4l2_dev, struct zr364xx_camera, v4l2_dev); + + videobuf_mmap_free(&cam->vb_vidq); + v4l2_ctrl_handler_free(&cam->ctrl_handler); + zr364xx_board_uninit(cam); + v4l2_device_unregister(&cam->v4l2_dev); kfree(cam); } @@ -1376,11 +1382,14 @@ static int zr364xx_board_init(struct zr364xx_camera *cam) /* start read pipe */ err = zr364xx_start_readpipe(cam); if (err) - goto err_free; + goto err_free_frames; DBG(": board initialized\n"); return 0; +err_free_frames: + for (i = 0; i < FRAMES; i++) + vfree(cam->buffer.frame[i].lpvbits); err_free: kfree(cam->pipe->transfer_buffer); cam->pipe->transfer_buffer = NULL; @@ -1409,12 +1418,10 @@ static int zr364xx_probe(struct usb_interface *intf, if (!cam) return -ENOMEM; - cam->v4l2_dev.release = zr364xx_release; err = v4l2_device_register(&intf->dev, &cam->v4l2_dev); if (err < 0) { dev_err(&udev->dev, "couldn't register v4l2_device\n"); - kfree(cam); - return err; + goto free_cam; } hdl = &cam->ctrl_handler; v4l2_ctrl_handler_init(hdl, 1); @@ -1423,7 +1430,7 @@ static int zr364xx_probe(struct usb_interface *intf, if (hdl->error) { err = hdl->error; dev_err(&udev->dev, "couldn't register control\n"); - goto fail; + goto unregister; } /* save the init method used by this camera */ cam->method = id->driver_info; @@ -1496,7 +1503,7 @@ static int zr364xx_probe(struct usb_interface *intf, if (!cam->read_endpoint) { err = -ENOMEM; dev_err(&intf->dev, "Could not find bulk-in endpoint\n"); - goto fail; + goto unregister; } /* v4l */ @@ -1507,10 +1514,11 @@ static int zr364xx_probe(struct usb_interface *intf, /* load zr364xx board specific */ err = zr364xx_board_init(cam); - if (!err) - err = v4l2_ctrl_handler_setup(hdl); if (err) - goto fail; + goto unregister; + err = v4l2_ctrl_handler_setup(hdl); + if (err) + goto board_uninit; spin_lock_init(&cam->slock); @@ -1525,16 +1533,21 @@ static int zr364xx_probe(struct usb_interface *intf, err = video_register_device(&cam->vdev, VFL_TYPE_VIDEO, -1); if (err) { dev_err(&udev->dev, "video_register_device failed\n"); - goto fail; + goto free_handler; } + cam->v4l2_dev.release = zr364xx_release; dev_info(&udev->dev, DRIVER_DESC " controlling device %s\n", video_device_node_name(&cam->vdev)); return 0; -fail: +free_handler: v4l2_ctrl_handler_free(hdl); +board_uninit: + zr364xx_board_uninit(cam); +unregister: v4l2_device_unregister(&cam->v4l2_dev); +free_cam: kfree(cam); return err; } diff --git a/drivers/media/v4l2-core/Kconfig b/drivers/media/v4l2-core/Kconfig index bf49f83cb86f..56664074a4d4 100644 --- a/drivers/media/v4l2-core/Kconfig +++ b/drivers/media/v4l2-core/Kconfig @@ -48,6 +48,11 @@ config VIDEO_TUNER config V4L2_JPEG_HELPER tristate +config V4L2_LOOPBACK + tristate "V4L2 loopback device" + help + V4L2 loopback device + # Used by drivers that need v4l2-h264.ko config V4L2_H264 tristate diff --git a/drivers/media/v4l2-core/Makefile b/drivers/media/v4l2-core/Makefile index 2ef0c7c958a2..e15772bd1316 100644 --- a/drivers/media/v4l2-core/Makefile +++ b/drivers/media/v4l2-core/Makefile @@ -27,6 +27,8 @@ obj-$(CONFIG_V4L2_FLASH_LED_CLASS) += v4l2-flash-led-class.o obj-$(CONFIG_V4L2_JPEG_HELPER) += v4l2-jpeg.o +obj-$(CONFIG_V4L2_LOOPBACK) += v4l2loopback.o + obj-$(CONFIG_VIDEOBUF_GEN) += videobuf-core.o obj-$(CONFIG_VIDEOBUF_DMA_SG) += videobuf-dma-sg.o obj-$(CONFIG_VIDEOBUF_DMA_CONTIG) += videobuf-dma-contig.o diff --git a/drivers/media/v4l2-core/v4l2-ctrls.c b/drivers/media/v4l2-core/v4l2-ctrls.c index 5cbe0ffbf501..9dc151431a5c 100644 --- a/drivers/media/v4l2-core/v4l2-ctrls.c +++ b/drivers/media/v4l2-core/v4l2-ctrls.c @@ -2165,7 +2165,8 @@ static int std_validate(const struct v4l2_ctrl *ctrl, u32 idx, case V4L2_CTRL_TYPE_INTEGER_MENU: if (ptr.p_s32[idx] < ctrl->minimum || ptr.p_s32[idx] > ctrl->maximum) return -ERANGE; - if (ctrl->menu_skip_mask & (1ULL << ptr.p_s32[idx])) + if (ptr.p_s32[idx] < BITS_PER_LONG_LONG && + (ctrl->menu_skip_mask & BIT_ULL(ptr.p_s32[idx]))) return -EINVAL; if (ctrl->type == V4L2_CTRL_TYPE_MENU && ctrl->qmenu[ptr.p_s32[idx]][0] == '\0') diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 3198abdd538c..9906b41004e9 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -3283,7 +3283,7 @@ video_usercopy(struct file *file, unsigned int orig_cmd, unsigned long arg, v4l2_kioctl func) { char sbuf[128]; - void *mbuf = NULL; + void *mbuf = NULL, *array_buf = NULL; void *parg = (void *)arg; long err = -EINVAL; bool has_array_args; @@ -3318,27 +3318,21 @@ video_usercopy(struct file *file, unsigned int orig_cmd, unsigned long arg, has_array_args = err; if (has_array_args) { - /* - * When adding new types of array args, make sure that the - * parent argument to ioctl (which contains the pointer to the - * array) fits into sbuf (so that mbuf will still remain - * unused up to here). - */ - mbuf = kvmalloc(array_size, GFP_KERNEL); + array_buf = kvmalloc(array_size, GFP_KERNEL); err = -ENOMEM; - if (NULL == mbuf) + if (array_buf == NULL) goto out_array_args; err = -EFAULT; if (in_compat_syscall()) - err = v4l2_compat_get_array_args(file, mbuf, user_ptr, - array_size, orig_cmd, - parg); + err = v4l2_compat_get_array_args(file, array_buf, + user_ptr, array_size, + orig_cmd, parg); else - err = copy_from_user(mbuf, user_ptr, array_size) ? + err = copy_from_user(array_buf, user_ptr, array_size) ? -EFAULT : 0; if (err) goto out_array_args; - *kernel_ptr = mbuf; + *kernel_ptr = array_buf; } /* Handles IOCTL */ @@ -3360,12 +3354,13 @@ video_usercopy(struct file *file, unsigned int orig_cmd, unsigned long arg, if (in_compat_syscall()) { int put_err; - put_err = v4l2_compat_put_array_args(file, user_ptr, mbuf, - array_size, orig_cmd, - parg); + put_err = v4l2_compat_put_array_args(file, user_ptr, + array_buf, + array_size, + orig_cmd, parg); if (put_err) err = put_err; - } else if (copy_to_user(user_ptr, mbuf, array_size)) { + } else if (copy_to_user(user_ptr, array_buf, array_size)) { err = -EFAULT; } goto out_array_args; @@ -3381,6 +3376,7 @@ video_usercopy(struct file *file, unsigned int orig_cmd, unsigned long arg, if (video_put_user((void __user *)arg, parg, cmd, orig_cmd)) err = -EFAULT; out: + kvfree(array_buf); kvfree(mbuf); return err; } diff --git a/drivers/media/v4l2-core/v4l2loopback.c b/drivers/media/v4l2-core/v4l2loopback.c new file mode 100644 index 000000000000..74af58d558c5 --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback.c @@ -0,0 +1,2904 @@ +/* -*- c-file-style: "linux" -*- */ +/* + * v4l2loopback.c -- video4linux2 loopback driver + * + * Copyright (C) 2005-2009 Vasily Levin (vasaka@gmail.com) + * Copyright (C) 2010-2019 IOhannes m zmoelnig (zmoelnig@iem.at) + * Copyright (C) 2011 Stefan Diewald (stefan.diewald@mytum.de) + * Copyright (C) 2012 Anton Novikov (random.plant@gmail.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) +#define HAVE__V4L2_DEVICE +#include +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 36) +#define HAVE__V4L2_CTRLS +#include +#endif +#include + +#include +#include "v4l2loopback.h" + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 6, 1) +#define kstrtoul strict_strtoul +#endif + +#if defined(timer_setup) && defined(from_timer) +#define HAVE_TIMER_SETUP +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 7, 0) +#define VFL_TYPE_VIDEO VFL_TYPE_GRABBER +#endif + +#define V4L2LOOPBACK_VERSION_CODE \ + KERNEL_VERSION(V4L2LOOPBACK_VERSION_MAJOR, V4L2LOOPBACK_VERSION_MINOR, \ + V4L2LOOPBACK_VERSION_BUGFIX) + +MODULE_DESCRIPTION("V4L2 loopback video device"); +MODULE_AUTHOR("Vasily Levin, " + "IOhannes m zmoelnig ," + "Stefan Diewald," + "Anton Novikov" + "et al."); +MODULE_LICENSE("GPL"); + +/* + * helpers + */ +#define STRINGIFY(s) #s +#define STRINGIFY2(s) STRINGIFY(s) + +#define dprintk(fmt, args...) \ + do { \ + if (debug > 0) { \ + printk(KERN_INFO "v4l2-loopback[" STRINGIFY2( \ + __LINE__) "]: " fmt, \ + ##args); \ + } \ + } while (0) + +#define MARK() \ + do { \ + if (debug > 1) { \ + printk(KERN_INFO "%s:%d[%s]\n", __FILE__, __LINE__, \ + __func__); \ + } \ + } while (0) + +#define dprintkrw(fmt, args...) \ + do { \ + if (debug > 2) { \ + printk(KERN_INFO "v4l2-loopback[" STRINGIFY2( \ + __LINE__) "]: " fmt, \ + ##args); \ + } \ + } while (0) + +/* + * compatibility hacks + */ + +#ifndef HAVE__V4L2_CTRLS +struct v4l2_ctrl_handler { + int error; +}; +struct v4l2_ctrl_config { + void *ops; + u32 id; + const char *name; + int type; + s32 min; + s32 max; + u32 step; + s32 def; +}; +int v4l2_ctrl_handler_init(struct v4l2_ctrl_handler *hdl, + unsigned nr_of_controls_hint) +{ + hdl->error = 0; + return 0; +} +void v4l2_ctrl_handler_free(struct v4l2_ctrl_handler *hdl) +{ +} +void *v4l2_ctrl_new_custom(struct v4l2_ctrl_handler *hdl, + const struct v4l2_ctrl_config *conf, void *priv) +{ + return NULL; +} +#endif /* HAVE__V4L2_CTRLS */ + +#ifndef HAVE__V4L2_DEVICE +/* dummy v4l2_device struct/functions */ +#define V4L2_DEVICE_NAME_SIZE (20 + 16) +struct v4l2_device { + char name[V4L2_DEVICE_NAME_SIZE]; + struct v4l2_ctrl_handler *ctrl_handler; +}; +static inline int v4l2_device_register(void *dev, void *v4l2_dev) +{ + return 0; +} +static inline void v4l2_device_unregister(struct v4l2_device *v4l2_dev) +{ + return; +} +#endif /* HAVE__V4L2_DEVICE */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 29) +#define v4l2_file_operations file_operations +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 37) +void *v4l2l_vzalloc(unsigned long size) +{ + void *data = vmalloc(size); + + memset(data, 0, size); + return data; +} +#else +#define v4l2l_vzalloc vzalloc +#endif + +static inline void v4l2l_get_timestamp(struct v4l2_buffer *b) +{ + /* ktime_get_ts is considered deprecated, so use ktime_get_ts64 if possible */ +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 17, 0) + struct timespec ts; + ktime_get_ts(&ts); +#else + struct timespec64 ts; + ktime_get_ts64(&ts); +#endif + + b->timestamp.tv_sec = ts.tv_sec; + b->timestamp.tv_usec = (ts.tv_nsec / NSEC_PER_USEC); +} + +#if !defined(__poll_t) +typedef unsigned __poll_t; +#endif + +/* module constants + * can be overridden during he build process using something like + * make KCPPFLAGS="-DMAX_DEVICES=100" + */ + +/* maximum number of v4l2loopback devices that can be created */ +#ifndef MAX_DEVICES +#define MAX_DEVICES 8 +#endif + +/* whether the default is to announce capabilities exclusively or not */ +#ifndef V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS +#define V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS 0 +#endif + +/* when a producer is considered to have gone stale */ +#ifndef MAX_TIMEOUT +#define MAX_TIMEOUT (100 * 1000) /* in msecs */ +#endif + +/* max buffers that can be mapped, actually they + * are all mapped to max_buffers buffers */ +#ifndef MAX_BUFFERS +#define MAX_BUFFERS 32 +#endif + +/* module parameters */ +static int debug = 0; +module_param(debug, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(debug, "debugging level (higher values == more verbose)"); + +#define V4L2LOOPBACK_DEFAULT_MAX_BUFFERS 2 +static int max_buffers = V4L2LOOPBACK_DEFAULT_MAX_BUFFERS; +module_param(max_buffers, int, S_IRUGO); +MODULE_PARM_DESC(max_buffers, + "how many buffers should be allocated [DEFAULT: " STRINGIFY2( + V4L2LOOPBACK_DEFAULT_MAX_BUFFERS) "]"); + +/* how many times a device can be opened + * the per-module default value can be overridden on a per-device basis using + * the /sys/devices interface + * + * note that max_openers should be at least 2 in order to get a working system: + * one opener for the producer and one opener for the consumer + * however, we leave that to the user + */ +#define V4L2LOOPBACK_DEFAULT_MAX_OPENERS 10 +static int max_openers = V4L2LOOPBACK_DEFAULT_MAX_OPENERS; +module_param(max_openers, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC( + max_openers, + "how many users can open the loopback device [DEFAULT: " STRINGIFY2( + V4L2LOOPBACK_DEFAULT_MAX_OPENERS) "]"); + +static int devices = -1; +module_param(devices, int, 0); +MODULE_PARM_DESC(devices, "how many devices should be created"); + +static int video_nr[MAX_DEVICES] = { [0 ...(MAX_DEVICES - 1)] = -1 }; +module_param_array(video_nr, int, NULL, 0444); +MODULE_PARM_DESC(video_nr, + "video device numbers (-1=auto, 0=/dev/video0, etc.)"); + +static char *card_label[MAX_DEVICES]; +module_param_array(card_label, charp, NULL, 0000); +MODULE_PARM_DESC(card_label, "card labels for each device"); + +static bool exclusive_caps[MAX_DEVICES] = { + [0 ...(MAX_DEVICES - 1)] = V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS +}; +module_param_array(exclusive_caps, bool, NULL, 0444); +/* FIXXME: wording */ +MODULE_PARM_DESC( + exclusive_caps, + "whether to announce OUTPUT/CAPTURE capabilities exclusively or not [DEFAULT: " STRINGIFY2( + V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS) "]"); + +/* format specifications */ +#define V4L2LOOPBACK_SIZE_MIN_WIDTH 48 +#define V4L2LOOPBACK_SIZE_MIN_HEIGHT 32 +#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH 8192 +#define V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT 8192 + +#define V4L2LOOPBACK_SIZE_DEFAULT_WIDTH 640 +#define V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT 480 + +static int max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH; +module_param(max_width, int, S_IRUGO); +MODULE_PARM_DESC(max_width, "maximum allowed frame width [DEFAULT: " STRINGIFY2( + V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH) "]"); +static int max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT; +module_param(max_height, int, S_IRUGO); +MODULE_PARM_DESC(max_height, + "maximum allowed frame height [DEFAULT: " STRINGIFY2( + V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT) "]"); + +static DEFINE_IDR(v4l2loopback_index_idr); +static DEFINE_MUTEX(v4l2loopback_ctl_mutex); + +/* control IDs */ +#ifndef HAVE__V4L2_CTRLS +#define V4L2LOOPBACK_CID_BASE (V4L2_CID_PRIVATE_BASE) +#else +#define V4L2LOOPBACK_CID_BASE (V4L2_CID_USER_BASE | 0xf000) +#endif +#define CID_KEEP_FORMAT (V4L2LOOPBACK_CID_BASE + 0) +#define CID_SUSTAIN_FRAMERATE (V4L2LOOPBACK_CID_BASE + 1) +#define CID_TIMEOUT (V4L2LOOPBACK_CID_BASE + 2) +#define CID_TIMEOUT_IMAGE_IO (V4L2LOOPBACK_CID_BASE + 3) + +static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl); +static const struct v4l2_ctrl_ops v4l2loopback_ctrl_ops = { + .s_ctrl = v4l2loopback_s_ctrl, +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_keepformat = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_KEEP_FORMAT, + .name = "keep_format", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = 0, + .max = 1, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_sustainframerate = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_SUSTAIN_FRAMERATE, + .name = "sustain_framerate", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = 0, + .max = 1, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeout = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_TIMEOUT, + .name = "timeout", + .type = V4L2_CTRL_TYPE_INTEGER, + .min = 0, + .max = MAX_TIMEOUT, + .step = 1, + .def = 0, + // clang-format on +}; +static const struct v4l2_ctrl_config v4l2loopback_ctrl_timeoutimageio = { + // clang-format off + .ops = &v4l2loopback_ctrl_ops, + .id = CID_TIMEOUT_IMAGE_IO, + .name = "timeout_image_io", + .type = V4L2_CTRL_TYPE_BOOLEAN, + .min = 0, + .max = 1, + .step = 1, + .def = 0, + // clang-format on +}; + +/* module structures */ +struct v4l2loopback_private { + int device_nr; +}; + +/* TODO(vasaka) use typenames which are common to kernel, but first find out if + * it is needed */ +/* struct keeping state and settings of loopback device */ + +struct v4l2l_buffer { + struct v4l2_buffer buffer; + struct list_head list_head; + int use_count; +}; + +struct v4l2_loopback_device { + struct v4l2_device v4l2_dev; + struct v4l2_ctrl_handler ctrl_handler; + struct video_device *vdev; + /* pixel and stream format */ + struct v4l2_pix_format pix_format; + struct v4l2_captureparm capture_param; + unsigned long frame_jiffies; + + /* ctrls */ + int keep_format; /* CID_KEEP_FORMAT; stay ready_for_capture even when all + openers close() the device */ + int sustain_framerate; /* CID_SUSTAIN_FRAMERATE; duplicate frames to maintain + (close to) nominal framerate */ + + /* buffers stuff */ + u8 *image; /* pointer to actual buffers data */ + unsigned long int imagesize; /* size of buffers data */ + int buffers_number; /* should not be big, 4 is a good choice */ + struct v4l2l_buffer buffers[MAX_BUFFERS]; /* inner driver buffers */ + int used_buffers; /* number of the actually used buffers */ + int max_openers; /* how many times can this device be opened */ + + int write_position; /* number of last written frame + 1 */ + struct list_head outbufs_list; /* buffers in output DQBUF order */ + int bufpos2index + [MAX_BUFFERS]; /* mapping of (read/write_position % used_buffers) + * to inner buffer index */ + long buffer_size; + + /* sustain_framerate stuff */ + struct timer_list sustain_timer; + unsigned int reread_count; + + /* timeout stuff */ + unsigned long timeout_jiffies; /* CID_TIMEOUT; 0 means disabled */ + int timeout_image_io; /* CID_TIMEOUT_IMAGE_IO; next opener will + * read/write to timeout_image */ + u8 *timeout_image; /* copy of it will be captured when timeout passes */ + struct v4l2l_buffer timeout_image_buffer; + struct timer_list timeout_timer; + int timeout_happened; + + /* sync stuff */ + atomic_t open_count; + + int ready_for_capture; /* set to the number of writers that opened the + * device and negotiated format. */ + int ready_for_output; /* set to true when no writer is currently attached + * this differs slightly from !ready_for_capture, + * e.g. when using fallback images */ + int announce_all_caps; /* set to false, if device caps (OUTPUT/CAPTURE) + * should only be announced if the resp. "ready" + * flag is set; default=TRUE */ + + int max_width; + int max_height; + + char card_label[32]; + + wait_queue_head_t read_event; + spinlock_t lock; +}; + +/* types of opener shows what opener wants to do with loopback */ +enum opener_type { + // clang-format off + UNNEGOTIATED = 0, + READER = 1, + WRITER = 2, + // clang-format on +}; + +/* struct keeping state and type of opener */ +struct v4l2_loopback_opener { + enum opener_type type; + int vidioc_enum_frameintervals_calls; + int read_position; /* number of last processed frame + 1 or + * write_position - 1 if reader went out of sync */ + unsigned int reread_count; + struct v4l2_buffer *buffers; + int buffers_number; /* should not be big, 4 is a good choice */ + int timeout_image_io; + + struct v4l2_fh fh; +}; + +#define fh_to_opener(ptr) container_of((ptr), struct v4l2_loopback_opener, fh) + +/* this is heavily inspired by the bttv driver found in the linux kernel */ +struct v4l2l_format { + char *name; + int fourcc; /* video4linux 2 */ + int depth; /* bit/pixel */ + int flags; +}; +/* set the v4l2l_format.flags to PLANAR for non-packed formats */ +#define FORMAT_FLAGS_PLANAR 0x01 +#define FORMAT_FLAGS_COMPRESSED 0x02 + +static const struct v4l2l_format formats[] = { +#include "v4l2loopback_formats.h" +}; + +static const unsigned int FORMATS = ARRAY_SIZE(formats); + +static char *fourcc2str(unsigned int fourcc, char buf[4]) +{ + buf[0] = (fourcc >> 0) & 0xFF; + buf[1] = (fourcc >> 8) & 0xFF; + buf[2] = (fourcc >> 16) & 0xFF; + buf[3] = (fourcc >> 24) & 0xFF; + + return buf; +} + +static const struct v4l2l_format *format_by_fourcc(int fourcc) +{ + unsigned int i; + + for (i = 0; i < FORMATS; i++) { + if (formats[i].fourcc == fourcc) + return formats + i; + } + + dprintk("unsupported format '%c%c%c%c'\n", (fourcc >> 0) & 0xFF, + (fourcc >> 8) & 0xFF, (fourcc >> 16) & 0xFF, + (fourcc >> 24) & 0xFF); + return NULL; +} + +static void pix_format_set_size(struct v4l2_pix_format *f, + const struct v4l2l_format *fmt, + unsigned int width, unsigned int height) +{ + f->width = width; + f->height = height; + + if (fmt->flags & FORMAT_FLAGS_PLANAR) { + f->bytesperline = width; /* Y plane */ + f->sizeimage = (width * height * fmt->depth) >> 3; + } else if (fmt->flags & FORMAT_FLAGS_COMPRESSED) { + /* doesn't make sense for compressed formats */ + f->bytesperline = 0; + f->sizeimage = (width * height * fmt->depth) >> 3; + } else { + f->bytesperline = (width * fmt->depth) >> 3; + f->sizeimage = height * f->bytesperline; + } +} + +static int set_timeperframe(struct v4l2_loopback_device *dev, + struct v4l2_fract *tpf) +{ + if ((tpf->denominator < 1) || (tpf->numerator < 1)) { + return -EINVAL; + } + dev->capture_param.timeperframe = *tpf; + dev->frame_jiffies = max(1UL, msecs_to_jiffies(1000) * tpf->numerator / + tpf->denominator); + return 0; +} + +static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd); + +/* device attributes */ +/* available via sysfs: /sys/devices/virtual/video4linux/video* */ + +static ssize_t attr_show_format(struct device *cd, + struct device_attribute *attr, char *buf) +{ + /* gets the current format as "FOURCC:WxH@f/s", e.g. "YUYV:320x240@1000/30" */ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + const struct v4l2_fract *tpf; + char buf4cc[5], buf_fps[32]; + + if (!dev || !dev->ready_for_capture) + return 0; + tpf = &dev->capture_param.timeperframe; + + fourcc2str(dev->pix_format.pixelformat, buf4cc); + buf4cc[4] = 0; + if (tpf->numerator == 1) + snprintf(buf_fps, sizeof(buf_fps), "%d", tpf->denominator); + else + snprintf(buf_fps, sizeof(buf_fps), "%d/%d", tpf->denominator, + tpf->numerator); + return sprintf(buf, "%4s:%dx%d@%s\n", buf4cc, dev->pix_format.width, + dev->pix_format.height, buf_fps); +} + +static ssize_t attr_store_format(struct device *cd, + struct device_attribute *attr, const char *buf, + size_t len) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + int fps_num = 0, fps_den = 1; + + /* only fps changing is supported */ + if (sscanf(buf, "@%d/%d", &fps_num, &fps_den) > 0) { + struct v4l2_fract f = { .numerator = fps_den, + .denominator = fps_num }; + int err = 0; + if ((err = set_timeperframe(dev, &f)) < 0) + return err; + return len; + } + return -EINVAL; +} + +static DEVICE_ATTR(format, S_IRUGO | S_IWUSR, attr_show_format, + attr_store_format); + +static ssize_t attr_show_buffers(struct device *cd, + struct device_attribute *attr, char *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + + return sprintf(buf, "%d\n", dev->used_buffers); +} + +static DEVICE_ATTR(buffers, S_IRUGO, attr_show_buffers, NULL); + +static ssize_t attr_show_maxopeners(struct device *cd, + struct device_attribute *attr, char *buf) +{ + struct v4l2_loopback_device *dev = v4l2loopback_cd2dev(cd); + + return sprintf(buf, "%d\n", dev->max_openers); +} + +static ssize_t attr_store_maxopeners(struct device *cd, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct v4l2_loopback_device *dev = NULL; + unsigned long curr = 0; + + if (kstrtoul(buf, 0, &curr)) + return -EINVAL; + + dev = v4l2loopback_cd2dev(cd); + + if (dev->max_openers == curr) + return len; + + if (dev->open_count.counter > curr) { + /* request to limit to less openers as are currently attached to us */ + return -EINVAL; + } + + dev->max_openers = (int)curr; + + return len; +} + +static DEVICE_ATTR(max_openers, S_IRUGO | S_IWUSR, attr_show_maxopeners, + attr_store_maxopeners); + +static void v4l2loopback_remove_sysfs(struct video_device *vdev) +{ +#define V4L2_SYSFS_DESTROY(x) device_remove_file(&vdev->dev, &dev_attr_##x) + + if (vdev) { + V4L2_SYSFS_DESTROY(format); + V4L2_SYSFS_DESTROY(buffers); + V4L2_SYSFS_DESTROY(max_openers); + /* ... */ + } +} + +static void v4l2loopback_create_sysfs(struct video_device *vdev) +{ + int res = 0; + +#define V4L2_SYSFS_CREATE(x) \ + res = device_create_file(&vdev->dev, &dev_attr_##x); \ + if (res < 0) \ + break + if (!vdev) + return; + do { + V4L2_SYSFS_CREATE(format); + V4L2_SYSFS_CREATE(buffers); + V4L2_SYSFS_CREATE(max_openers); + /* ... */ + } while (0); + + if (res >= 0) + return; + dev_err(&vdev->dev, "%s error: %d\n", __func__, res); +} + +/* global module data */ +/* find a device based on it's device-number (e.g. '3' for /dev/video3) */ +struct v4l2loopback_lookup_cb_data { + int device_nr; + struct v4l2_loopback_device *device; +}; +static int v4l2loopback_lookup_cb(int id, void *ptr, void *data) +{ + struct v4l2_loopback_device *device = ptr; + struct v4l2loopback_lookup_cb_data *cbdata = data; + if (cbdata && device && device->vdev) { + if (device->vdev->num == cbdata->device_nr) { + cbdata->device = device; + cbdata->device_nr = id; + return 1; + } + } + return 0; +} +static int v4l2loopback_lookup(int device_nr, + struct v4l2_loopback_device **device) +{ + struct v4l2loopback_lookup_cb_data data = { + .device_nr = device_nr, + .device = NULL, + }; + int err = idr_for_each(&v4l2loopback_index_idr, &v4l2loopback_lookup_cb, + &data); + if (1 == err) { + if (device) + *device = data.device; + return data.device_nr; + } + return -ENODEV; +} +static struct v4l2_loopback_device *v4l2loopback_cd2dev(struct device *cd) +{ + struct video_device *loopdev = to_video_device(cd); + struct v4l2loopback_private *ptr = + (struct v4l2loopback_private *)video_get_drvdata(loopdev); + int nr = ptr->device_nr; + + return idr_find(&v4l2loopback_index_idr, nr); +} + +static struct v4l2_loopback_device *v4l2loopback_getdevice(struct file *f) +{ + struct video_device *loopdev = video_devdata(f); + struct v4l2loopback_private *ptr = + (struct v4l2loopback_private *)video_get_drvdata(loopdev); + int nr = ptr->device_nr; + + return idr_find(&v4l2loopback_index_idr, nr); +} + +/* forward declarations */ +static void init_buffers(struct v4l2_loopback_device *dev); +static int allocate_buffers(struct v4l2_loopback_device *dev); +static int free_buffers(struct v4l2_loopback_device *dev); +static void try_free_buffers(struct v4l2_loopback_device *dev); +static int allocate_timeout_image(struct v4l2_loopback_device *dev); +static void check_timers(struct v4l2_loopback_device *dev); +static const struct v4l2_file_operations v4l2_loopback_fops; +static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops; + +/* Queue helpers */ +/* next functions sets buffer flags and adjusts counters accordingly */ +static inline void set_done(struct v4l2l_buffer *buffer) +{ + buffer->buffer.flags &= ~V4L2_BUF_FLAG_QUEUED; + buffer->buffer.flags |= V4L2_BUF_FLAG_DONE; +} + +static inline void set_queued(struct v4l2l_buffer *buffer) +{ + buffer->buffer.flags &= ~V4L2_BUF_FLAG_DONE; + buffer->buffer.flags |= V4L2_BUF_FLAG_QUEUED; +} + +static inline void unset_flags(struct v4l2l_buffer *buffer) +{ + buffer->buffer.flags &= ~V4L2_BUF_FLAG_QUEUED; + buffer->buffer.flags &= ~V4L2_BUF_FLAG_DONE; +} + +/* V4L2 ioctl caps and params calls */ +/* returns device capabilities + * called on VIDIOC_QUERYCAP + */ +static int vidioc_querycap(struct file *file, void *priv, + struct v4l2_capability *cap) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + int labellen = (sizeof(cap->card) < sizeof(dev->card_label)) ? + sizeof(cap->card) : + sizeof(dev->card_label); + int device_nr = + ((struct v4l2loopback_private *)video_get_drvdata(dev->vdev)) + ->device_nr; + __u32 capabilities = V4L2_CAP_STREAMING | V4L2_CAP_READWRITE; + + strlcpy(cap->driver, "v4l2 loopback", sizeof(cap->driver)); + snprintf(cap->card, labellen, dev->card_label); + snprintf(cap->bus_info, sizeof(cap->bus_info), + "platform:v4l2loopback-%03d", device_nr); + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 1, 0) + /* since 3.1.0, the v4l2-core system is supposed to set the version */ + cap->version = V4L2LOOPBACK_VERSION_CODE; +#endif + +#ifdef V4L2_CAP_VIDEO_M2M + capabilities |= V4L2_CAP_VIDEO_M2M; +#endif /* V4L2_CAP_VIDEO_M2M */ + + if (dev->announce_all_caps) { + capabilities |= V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT; + } else { + if (dev->ready_for_capture) { + capabilities |= V4L2_CAP_VIDEO_CAPTURE; + } + if (dev->ready_for_output) { + capabilities |= V4L2_CAP_VIDEO_OUTPUT; + } + } + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + dev->vdev->device_caps = +#endif /* >=linux-4.7.0 */ + cap->device_caps = cap->capabilities = capabilities; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 3, 0) + cap->capabilities |= V4L2_CAP_DEVICE_CAPS; +#endif + + memset(cap->reserved, 0, sizeof(cap->reserved)); + return 0; +} + +static int vidioc_enum_framesizes(struct file *file, void *fh, + struct v4l2_frmsizeenum *argp) +{ + struct v4l2_loopback_device *dev; + + /* LATER: what does the index really mean? + * if it's about enumerating formats, we can safely ignore it + * (CHECK) + */ + + /* there can be only one... */ + if (argp->index) + return -EINVAL; + + dev = v4l2loopback_getdevice(file); + if (dev->ready_for_capture) { + /* format has already been negotiated + * cannot change during runtime + */ + argp->type = V4L2_FRMSIZE_TYPE_DISCRETE; + + argp->discrete.width = dev->pix_format.width; + argp->discrete.height = dev->pix_format.height; + } else { + /* if the format has not been negotiated yet, we accept anything + */ + argp->type = V4L2_FRMSIZE_TYPE_CONTINUOUS; + + argp->stepwise.min_width = V4L2LOOPBACK_SIZE_MIN_WIDTH; + argp->stepwise.min_height = V4L2LOOPBACK_SIZE_MIN_HEIGHT; + + argp->stepwise.max_width = dev->max_width; + argp->stepwise.max_height = dev->max_height; + + argp->stepwise.step_width = 1; + argp->stepwise.step_height = 1; + } + return 0; +} + +/* returns frameinterval (fps) for the set resolution + * called on VIDIOC_ENUM_FRAMEINTERVALS + */ +static int vidioc_enum_frameintervals(struct file *file, void *fh, + struct v4l2_frmivalenum *argp) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(fh); + + if (dev->ready_for_capture) { + if (opener->vidioc_enum_frameintervals_calls > 0) + return -EINVAL; + if (argp->width == dev->pix_format.width && + argp->height == dev->pix_format.height) { + argp->type = V4L2_FRMIVAL_TYPE_DISCRETE; + argp->discrete = dev->capture_param.timeperframe; + opener->vidioc_enum_frameintervals_calls++; + return 0; + } + return -EINVAL; + } + return 0; +} + +/* ------------------ CAPTURE ----------------------- */ + +/* returns device formats + * called on VIDIOC_ENUM_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE + */ +static int vidioc_enum_fmt_cap(struct file *file, void *fh, + struct v4l2_fmtdesc *f) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + + if (f->index) + return -EINVAL; + if (dev->ready_for_capture) { + const __u32 format = dev->pix_format.pixelformat; + + snprintf(f->description, sizeof(f->description), "[%c%c%c%c]", + (format >> 0) & 0xFF, (format >> 8) & 0xFF, + (format >> 16) & 0xFF, (format >> 24) & 0xFF); + + f->pixelformat = dev->pix_format.pixelformat; + } else { + return -EINVAL; + } + f->flags = 0; + MARK(); + return 0; +} + +/* returns current video format format fmt + * called on VIDIOC_G_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE + */ +static int vidioc_g_fmt_cap(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + + if (!dev->ready_for_capture) + return -EINVAL; + + fmt->fmt.pix = dev->pix_format; + MARK(); + return 0; +} + +/* checks if it is OK to change to format fmt; + * actual check is done by inner_try_fmt_cap + * just checking that pixelformat is OK and set other parameters, app should + * obey this decision + * called on VIDIOC_TRY_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE + */ +static int vidioc_try_fmt_cap(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + char buf[5]; + + dev = v4l2loopback_getdevice(file); + + if (0 == dev->ready_for_capture) { + dprintk("setting fmt_cap not possible yet\n"); + return -EBUSY; + } + + if (fmt->fmt.pix.pixelformat != dev->pix_format.pixelformat) + return -EINVAL; + + fmt->fmt.pix = dev->pix_format; + + buf[4] = 0; + dprintk("capFOURCC=%s\n", fourcc2str(dev->pix_format.pixelformat, buf)); + return 0; +} + +/* sets new output format, if possible + * actually format is set by input and we even do not check it, just return + * current one, but it is possible to set subregions of input TODO(vasaka) + * called on VIDIOC_S_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_CAPTURE + */ +static int vidioc_s_fmt_cap(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + return vidioc_try_fmt_cap(file, priv, fmt); +} + +/* ------------------ OUTPUT ----------------------- */ + +/* returns device formats; + * LATER: allow all formats + * called on VIDIOC_ENUM_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT + */ +static int vidioc_enum_fmt_out(struct file *file, void *fh, + struct v4l2_fmtdesc *f) +{ + struct v4l2_loopback_device *dev; + const struct v4l2l_format *fmt; + + dev = v4l2loopback_getdevice(file); + + if (dev->ready_for_capture) { + const __u32 format = dev->pix_format.pixelformat; + + /* format has been fixed by the writer, so only one single format is supported */ + if (f->index) + return -EINVAL; + + fmt = format_by_fourcc(format); + if (NULL == fmt) + return -EINVAL; + + f->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + /* f->flags = ??; */ + snprintf(f->description, sizeof(f->description), "%s", + fmt->name); + + f->pixelformat = dev->pix_format.pixelformat; + } else { + /* fill in a dummy format */ + /* coverity[unsigned_compare] */ + if (f->index < 0 || f->index >= FORMATS) + return -EINVAL; + + fmt = &formats[f->index]; + + f->pixelformat = fmt->fourcc; + snprintf(f->description, sizeof(f->description), "%s", + fmt->name); + } + f->flags = 0; + + return 0; +} + +/* returns current video format format fmt */ +/* NOTE: this is called from the producer + * so if format has not been negotiated yet, + * it should return ALL of available formats, + * called on VIDIOC_G_FMT, with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT + */ +static int vidioc_g_fmt_out(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + + /* + * LATER: this should return the currently valid format + * gstreamer doesn't like it, if this returns -EINVAL, as it + * then concludes that there is _no_ valid format + * CHECK whether this assumption is wrong, + * or whether we have to always provide a valid format + */ + + fmt->fmt.pix = dev->pix_format; + return 0; +} + +/* checks if it is OK to change to format fmt; + * if format is negotiated do not change it + * called on VIDIOC_TRY_FMT with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT + */ +static int vidioc_try_fmt_out(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + + /* TODO(vasaka) loopback does not care about formats writer want to set, + * maybe it is a good idea to restrict format somehow */ + if (dev->ready_for_capture) { + fmt->fmt.pix = dev->pix_format; + } else { + __u32 w = fmt->fmt.pix.width; + __u32 h = fmt->fmt.pix.height; + __u32 pixfmt = fmt->fmt.pix.pixelformat; + const struct v4l2l_format *format = format_by_fourcc(pixfmt); + + if (w > dev->max_width) + w = dev->max_width; + if (h > dev->max_height) + h = dev->max_height; + + dprintk("trying image %dx%d\n", w, h); + + if (w < 1) + w = V4L2LOOPBACK_SIZE_DEFAULT_WIDTH; + + if (h < 1) + h = V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT; + + if (NULL == format) + format = &formats[0]; + + pix_format_set_size(&fmt->fmt.pix, format, w, h); + + fmt->fmt.pix.pixelformat = format->fourcc; + fmt->fmt.pix.colorspace = V4L2_COLORSPACE_SRGB; + + if (V4L2_FIELD_ANY == fmt->fmt.pix.field) + fmt->fmt.pix.field = V4L2_FIELD_NONE; + + /* FIXXME: try_fmt should never modify the device-state */ + dev->pix_format = fmt->fmt.pix; + } + return 0; +} + +/* sets new output format, if possible; + * allocate data here because we do not know if it will be streaming or + * read/write IO + * called on VIDIOC_S_FMT with v4l2_buf_type set to V4L2_BUF_TYPE_VIDEO_OUTPUT + */ +static int vidioc_s_fmt_out(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + struct v4l2_loopback_device *dev; + char buf[5]; + int ret; + MARK(); + + dev = v4l2loopback_getdevice(file); + ret = vidioc_try_fmt_out(file, priv, fmt); + + dprintk("s_fmt_out(%d) %d...%d\n", ret, dev->ready_for_capture, + dev->pix_format.sizeimage); + + buf[4] = 0; + dprintk("outFOURCC=%s\n", fourcc2str(dev->pix_format.pixelformat, buf)); + + if (ret < 0) + return ret; + + if (!dev->ready_for_capture) { + dev->buffer_size = PAGE_ALIGN(dev->pix_format.sizeimage); + fmt->fmt.pix.sizeimage = dev->buffer_size; + allocate_buffers(dev); + } + return ret; +} + +// #define V4L2L_OVERLAY +#ifdef V4L2L_OVERLAY +/* ------------------ OVERLAY ----------------------- */ +/* currently unsupported */ +/* GSTreamer's v4l2sink is buggy, as it requires the overlay to work + * while it should only require it, if overlay is requested + * once the gstreamer element is fixed, remove the overlay dummies + */ +#warning OVERLAY dummies +static int vidioc_g_fmt_overlay(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + return 0; +} + +static int vidioc_s_fmt_overlay(struct file *file, void *priv, + struct v4l2_format *fmt) +{ + return 0; +} +#endif /* V4L2L_OVERLAY */ + +/* ------------------ PARAMs ----------------------- */ + +/* get some data flow parameters, only capability, fps and readbuffers has + * effect on this driver + * called on VIDIOC_G_PARM + */ +static int vidioc_g_parm(struct file *file, void *priv, + struct v4l2_streamparm *parm) +{ + /* do not care about type of opener, hope these enums would always be + * compatible */ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + parm->parm.capture = dev->capture_param; + return 0; +} + +/* get some data flow parameters, only capability, fps and readbuffers has + * effect on this driver + * called on VIDIOC_S_PARM + */ +static int vidioc_s_parm(struct file *file, void *priv, + struct v4l2_streamparm *parm) +{ + struct v4l2_loopback_device *dev; + int err = 0; + MARK(); + + dev = v4l2loopback_getdevice(file); + dprintk("vidioc_s_parm called frate=%d/%d\n", + parm->parm.capture.timeperframe.numerator, + parm->parm.capture.timeperframe.denominator); + + switch (parm->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + if ((err = set_timeperframe( + dev, &parm->parm.capture.timeperframe)) < 0) + return err; + break; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if ((err = set_timeperframe( + dev, &parm->parm.capture.timeperframe)) < 0) + return err; + break; + default: + return -1; + } + + parm->parm.capture = dev->capture_param; + return 0; +} + +#ifdef V4L2LOOPBACK_WITH_STD +/* sets a tv standard, actually we do not need to handle this any special way + * added to support effecttv + * called on VIDIOC_S_STD + */ +static int vidioc_s_std(struct file *file, void *fh, v4l2_std_id *_std) +{ + v4l2_std_id req_std = 0, supported_std = 0; + const v4l2_std_id all_std = V4L2_STD_ALL, no_std = 0; + + if (_std) { + req_std = *_std; + *_std = all_std; + } + + /* we support everything in V4L2_STD_ALL, but not more... */ + supported_std = (all_std & req_std); + if (no_std == supported_std) + return -EINVAL; + + return 0; +} + +/* gets a fake video standard + * called on VIDIOC_G_STD + */ +static int vidioc_g_std(struct file *file, void *fh, v4l2_std_id *norm) +{ + if (norm) + *norm = V4L2_STD_ALL; + return 0; +} +/* gets a fake video standard + * called on VIDIOC_QUERYSTD + */ +static int vidioc_querystd(struct file *file, void *fh, v4l2_std_id *norm) +{ + if (norm) + *norm = V4L2_STD_ALL; + return 0; +} +#endif /* V4L2LOOPBACK_WITH_STD */ + +/* get ctrls info + * called on VIDIOC_QUERYCTRL + */ +static int vidioc_queryctrl(struct file *file, void *fh, + struct v4l2_queryctrl *q) +{ + const struct v4l2_ctrl_config *cnf = 0; + switch (q->id) { + case CID_KEEP_FORMAT: + cnf = &v4l2loopback_ctrl_keepformat; + break; + case CID_SUSTAIN_FRAMERATE: + cnf = &v4l2loopback_ctrl_sustainframerate; + break; + case CID_TIMEOUT: + cnf = &v4l2loopback_ctrl_timeout; + break; + case CID_TIMEOUT_IMAGE_IO: + cnf = &v4l2loopback_ctrl_timeoutimageio; + break; + default: + return -EINVAL; + } + if (!cnf) + BUG(); + + strcpy(q->name, cnf->name); + q->default_value = cnf->def; + q->type = cnf->type; + q->minimum = cnf->min; + q->maximum = cnf->max; + q->step = cnf->step; + + memset(q->reserved, 0, sizeof(q->reserved)); + return 0; +} + +static int vidioc_g_ctrl(struct file *file, void *fh, struct v4l2_control *c) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + + switch (c->id) { + case CID_KEEP_FORMAT: + c->value = dev->keep_format; + break; + case CID_SUSTAIN_FRAMERATE: + c->value = dev->sustain_framerate; + break; + case CID_TIMEOUT: + c->value = jiffies_to_msecs(dev->timeout_jiffies); + break; + case CID_TIMEOUT_IMAGE_IO: + c->value = dev->timeout_image_io; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int v4l2loopback_set_ctrl(struct v4l2_loopback_device *dev, u32 id, + s64 val) +{ + switch (id) { + case CID_KEEP_FORMAT: + if (val < 0 || val > 1) + return -EINVAL; + dev->keep_format = val; + try_free_buffers(dev); /* will only free buffers if !keep_format */ + break; + case CID_SUSTAIN_FRAMERATE: + if (val < 0 || val > 1) + return -EINVAL; + spin_lock_bh(&dev->lock); + dev->sustain_framerate = val; + check_timers(dev); + spin_unlock_bh(&dev->lock); + break; + case CID_TIMEOUT: + if (val < 0 || val > MAX_TIMEOUT) + return -EINVAL; + spin_lock_bh(&dev->lock); + dev->timeout_jiffies = msecs_to_jiffies(val); + check_timers(dev); + spin_unlock_bh(&dev->lock); + allocate_timeout_image(dev); + break; + case CID_TIMEOUT_IMAGE_IO: + if (val < 0 || val > 1) + return -EINVAL; + dev->timeout_image_io = val; + break; + default: + return -EINVAL; + } + return 0; +} + +static int v4l2loopback_s_ctrl(struct v4l2_ctrl *ctrl) +{ + struct v4l2_loopback_device *dev = container_of( + ctrl->handler, struct v4l2_loopback_device, ctrl_handler); + return v4l2loopback_set_ctrl(dev, ctrl->id, ctrl->val); +} +static int vidioc_s_ctrl(struct file *file, void *fh, struct v4l2_control *c) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + return v4l2loopback_set_ctrl(dev, c->id, c->value); +} + +/* returns set of device outputs, in our case there is only one + * called on VIDIOC_ENUMOUTPUT + */ +static int vidioc_enum_output(struct file *file, void *fh, + struct v4l2_output *outp) +{ + __u32 index = outp->index; + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + MARK(); + + if (!dev->announce_all_caps && !dev->ready_for_output) + return -ENOTTY; + + if (0 != index) + return -EINVAL; + + /* clear all data (including the reserved fields) */ + memset(outp, 0, sizeof(*outp)); + + outp->index = index; + strlcpy(outp->name, "loopback in", sizeof(outp->name)); + outp->type = V4L2_OUTPUT_TYPE_ANALOG; + outp->audioset = 0; + outp->modulator = 0; +#ifdef V4L2LOOPBACK_WITH_STD + outp->std = V4L2_STD_ALL; +#ifdef V4L2_OUT_CAP_STD + outp->capabilities |= V4L2_OUT_CAP_STD; +#endif /* V4L2_OUT_CAP_STD */ +#endif /* V4L2LOOPBACK_WITH_STD */ + + return 0; +} + +/* which output is currently active, + * called on VIDIOC_G_OUTPUT + */ +static int vidioc_g_output(struct file *file, void *fh, unsigned int *i) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!dev->announce_all_caps && !dev->ready_for_output) + return -ENOTTY; + if (i) + *i = 0; + return 0; +} + +/* set output, can make sense if we have more than one video src, + * called on VIDIOC_S_OUTPUT + */ +static int vidioc_s_output(struct file *file, void *fh, unsigned int i) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!dev->announce_all_caps && !dev->ready_for_output) + return -ENOTTY; + + if (i) + return -EINVAL; + + return 0; +} + +/* returns set of device inputs, in our case there is only one, + * but later I may add more + * called on VIDIOC_ENUMINPUT + */ +static int vidioc_enum_input(struct file *file, void *fh, + struct v4l2_input *inp) +{ + __u32 index = inp->index; + MARK(); + + if (0 != index) + return -EINVAL; + + /* clear all data (including the reserved fields) */ + memset(inp, 0, sizeof(*inp)); + + inp->index = index; + strlcpy(inp->name, "loopback", sizeof(inp->name)); + inp->type = V4L2_INPUT_TYPE_CAMERA; + inp->audioset = 0; + inp->tuner = 0; + inp->status = 0; + +#ifdef V4L2LOOPBACK_WITH_STD + inp->std = V4L2_STD_ALL; +#ifdef V4L2_IN_CAP_STD + inp->capabilities |= V4L2_IN_CAP_STD; +#endif +#endif /* V4L2LOOPBACK_WITH_STD */ + + return 0; +} + +/* which input is currently active, + * called on VIDIOC_G_INPUT + */ +static int vidioc_g_input(struct file *file, void *fh, unsigned int *i) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!dev->announce_all_caps && !dev->ready_for_capture) + return -ENOTTY; + if (i) + *i = 0; + return 0; +} + +/* set input, can make sense if we have more than one video src, + * called on VIDIOC_S_INPUT + */ +static int vidioc_s_input(struct file *file, void *fh, unsigned int i) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + if (!dev->announce_all_caps && !dev->ready_for_capture) + return -ENOTTY; + if (i == 0) + return 0; + return -EINVAL; +} + +/* --------------- V4L2 ioctl buffer related calls ----------------- */ + +/* negotiate buffer type + * only mmap streaming supported + * called on VIDIOC_REQBUFS + */ +static int vidioc_reqbufs(struct file *file, void *fh, + struct v4l2_requestbuffers *b) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + int i; + MARK(); + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + + dprintk("reqbufs: %d\t%d=%d\n", b->memory, b->count, + dev->buffers_number); + if (opener->timeout_image_io) { + if (b->memory != V4L2_MEMORY_MMAP) + return -EINVAL; + b->count = 1; + return 0; + } + + init_buffers(dev); + switch (b->memory) { + case V4L2_MEMORY_MMAP: + /* do nothing here, buffers are always allocated */ + if (b->count < 1 || dev->buffers_number < 1) + return 0; + + if (b->count > dev->buffers_number) + b->count = dev->buffers_number; + + /* make sure that outbufs_list contains buffers from 0 to used_buffers-1 + * actually, it will have been already populated via v4l2_loopback_init() + * at this point */ + if (list_empty(&dev->outbufs_list)) { + for (i = 0; i < dev->used_buffers; ++i) + list_add_tail(&dev->buffers[i].list_head, + &dev->outbufs_list); + } + + /* also, if dev->used_buffers is going to be decreased, we should remove + * out-of-range buffers from outbufs_list, and fix bufpos2index mapping */ + if (b->count < dev->used_buffers) { + struct v4l2l_buffer *pos, *n; + + list_for_each_entry_safe (pos, n, &dev->outbufs_list, + list_head) { + if (pos->buffer.index >= b->count) + list_del(&pos->list_head); + } + + /* after we update dev->used_buffers, buffers in outbufs_list will + * correspond to dev->write_position + [0;b->count-1] range */ + i = dev->write_position; + list_for_each_entry (pos, &dev->outbufs_list, + list_head) { + dev->bufpos2index[i % b->count] = + pos->buffer.index; + ++i; + } + } + + opener->buffers_number = b->count; + if (opener->buffers_number < dev->used_buffers) + dev->used_buffers = opener->buffers_number; + return 0; + default: + return -EINVAL; + } +} + +/* returns buffer asked for; + * give app as many buffers as it wants, if it less than MAX, + * but map them in our inner buffers + * called on VIDIOC_QUERYBUF + */ +static int vidioc_querybuf(struct file *file, void *fh, struct v4l2_buffer *b) +{ + enum v4l2_buf_type type; + int index; + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + + MARK(); + + type = b->type; + index = b->index; + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + + if ((b->type != V4L2_BUF_TYPE_VIDEO_CAPTURE) && + (b->type != V4L2_BUF_TYPE_VIDEO_OUTPUT)) { + return -EINVAL; + } + if (b->index > max_buffers) + return -EINVAL; + + if (opener->timeout_image_io) + *b = dev->timeout_image_buffer.buffer; + else + *b = dev->buffers[b->index % dev->used_buffers].buffer; + + b->type = type; + b->index = index; + dprintkrw("buffer type: %d (of %d with size=%ld)\n", b->memory, + dev->buffers_number, dev->buffer_size); + + /* Hopefully fix 'DQBUF return bad index if queue bigger then 2 for capture' + https://github.com/umlaeute/v4l2loopback/issues/60 */ + b->flags &= ~V4L2_BUF_FLAG_DONE; + b->flags |= V4L2_BUF_FLAG_QUEUED; + + return 0; +} + +static void buffer_written(struct v4l2_loopback_device *dev, + struct v4l2l_buffer *buf) +{ + del_timer_sync(&dev->sustain_timer); + del_timer_sync(&dev->timeout_timer); + spin_lock_bh(&dev->lock); + + dev->bufpos2index[dev->write_position % dev->used_buffers] = + buf->buffer.index; + list_move_tail(&buf->list_head, &dev->outbufs_list); + ++dev->write_position; + dev->reread_count = 0; + + check_timers(dev); + spin_unlock_bh(&dev->lock); +} + +/* put buffer to queue + * called on VIDIOC_QBUF + */ +static int vidioc_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + struct v4l2l_buffer *b; + int index; + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + + if (buf->index > max_buffers) + return -EINVAL; + if (opener->timeout_image_io) + return 0; + + index = buf->index % dev->used_buffers; + b = &dev->buffers[index]; + + switch (buf->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + dprintkrw("capture QBUF index: %d\n", index); + set_queued(b); + return 0; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + dprintkrw("output QBUF pos: %d index: %d\n", + dev->write_position, index); + if (buf->timestamp.tv_sec == 0 && buf->timestamp.tv_usec == 0) + v4l2l_get_timestamp(&b->buffer); + else + b->buffer.timestamp = buf->timestamp; + b->buffer.bytesused = buf->bytesused; + set_done(b); + buffer_written(dev, b); + + /* Hopefully fix 'DQBUF return bad index if queue bigger then 2 for capture' + https://github.com/umlaeute/v4l2loopback/issues/60 */ + buf->flags &= ~V4L2_BUF_FLAG_DONE; + buf->flags |= V4L2_BUF_FLAG_QUEUED; + + wake_up_all(&dev->read_event); + return 0; + default: + return -EINVAL; + } +} + +static int can_read(struct v4l2_loopback_device *dev, + struct v4l2_loopback_opener *opener) +{ + int ret; + + spin_lock_bh(&dev->lock); + check_timers(dev); + ret = dev->write_position > opener->read_position || + dev->reread_count > opener->reread_count || dev->timeout_happened; + spin_unlock_bh(&dev->lock); + return ret; +} + +static int get_capture_buffer(struct file *file) +{ + struct v4l2_loopback_device *dev = v4l2loopback_getdevice(file); + struct v4l2_loopback_opener *opener = fh_to_opener(file->private_data); + int pos, ret; + int timeout_happened; + + if ((file->f_flags & O_NONBLOCK) && + (dev->write_position <= opener->read_position && + dev->reread_count <= opener->reread_count && + !dev->timeout_happened)) + return -EAGAIN; + wait_event_interruptible(dev->read_event, can_read(dev, opener)); + + spin_lock_bh(&dev->lock); + if (dev->write_position == opener->read_position) { + if (dev->reread_count > opener->reread_count + 2) + opener->reread_count = dev->reread_count - 1; + ++opener->reread_count; + pos = (opener->read_position + dev->used_buffers - 1) % + dev->used_buffers; + } else { + opener->reread_count = 0; + if (dev->write_position > + opener->read_position + dev->used_buffers) + opener->read_position = dev->write_position - 1; + pos = opener->read_position % dev->used_buffers; + ++opener->read_position; + } + timeout_happened = dev->timeout_happened; + dev->timeout_happened = 0; + spin_unlock_bh(&dev->lock); + + ret = dev->bufpos2index[pos]; + if (timeout_happened) { + /* although allocated on-demand, timeout_image is freed only + * in free_buffers(), so we don't need to worry about it being + * deallocated suddenly */ + memcpy(dev->image + dev->buffers[ret].buffer.m.offset, + dev->timeout_image, dev->buffer_size); + } + return ret; +} + +/* put buffer to dequeue + * called on VIDIOC_DQBUF + */ +static int vidioc_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + int index; + struct v4l2l_buffer *b; + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + if (opener->timeout_image_io) { + *buf = dev->timeout_image_buffer.buffer; + return 0; + } + + switch (buf->type) { + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + index = get_capture_buffer(file); + if (index < 0) + return index; + dprintkrw("capture DQBUF pos: %d index: %d\n", + opener->read_position - 1, index); + if (!(dev->buffers[index].buffer.flags & + V4L2_BUF_FLAG_MAPPED)) { + dprintk("trying to return not mapped buf[%d]\n", index); + return -EINVAL; + } + unset_flags(&dev->buffers[index]); + *buf = dev->buffers[index].buffer; + return 0; + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + b = list_entry(dev->outbufs_list.prev, struct v4l2l_buffer, + list_head); + list_move_tail(&b->list_head, &dev->outbufs_list); + dprintkrw("output DQBUF index: %d\n", b->buffer.index); + unset_flags(b); + *buf = b->buffer; + buf->type = V4L2_BUF_TYPE_VIDEO_OUTPUT; + return 0; + default: + return -EINVAL; + } +} + +/* ------------- STREAMING ------------------- */ + +/* start streaming + * called on VIDIOC_STREAMON + */ +static int vidioc_streamon(struct file *file, void *fh, enum v4l2_buf_type type) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + MARK(); + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(fh); + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + opener->type = WRITER; + dev->ready_for_output = 0; + if (!dev->ready_for_capture) { + int ret = allocate_buffers(dev); + if (ret < 0) + return ret; + } + dev->ready_for_capture++; + return 0; + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + opener->type = READER; + if (!dev->ready_for_capture) + return -EIO; + return 0; + default: + return -EINVAL; + } + return -EINVAL; +} + +/* stop streaming + * called on VIDIOC_STREAMOFF + */ +static int vidioc_streamoff(struct file *file, void *fh, + enum v4l2_buf_type type) +{ + struct v4l2_loopback_device *dev; + MARK(); + dprintk("%d\n", type); + + dev = v4l2loopback_getdevice(file); + + switch (type) { + case V4L2_BUF_TYPE_VIDEO_OUTPUT: + if (dev->ready_for_capture > 0) + dev->ready_for_capture--; + return 0; + case V4L2_BUF_TYPE_VIDEO_CAPTURE: + return 0; + default: + return -EINVAL; + } + return -EINVAL; +} + +#ifdef CONFIG_VIDEO_V4L1_COMPAT +static int vidiocgmbuf(struct file *file, void *fh, struct video_mbuf *p) +{ + struct v4l2_loopback_device *dev; + MARK(); + + dev = v4l2loopback_getdevice(file); + p->frames = dev->buffers_number; + p->offsets[0] = 0; + p->offsets[1] = 0; + p->size = dev->buffer_size; + return 0; +} +#endif + +static int vidioc_subscribe_event(struct v4l2_fh *fh, + const struct v4l2_event_subscription *sub) +{ + switch (sub->type) { + case V4L2_EVENT_CTRL: + return v4l2_ctrl_subscribe_event(fh, sub); + } + + return -EINVAL; +} + +/* file operations */ +static void vm_open(struct vm_area_struct *vma) +{ + struct v4l2l_buffer *buf; + MARK(); + + buf = vma->vm_private_data; + buf->use_count++; +} + +static void vm_close(struct vm_area_struct *vma) +{ + struct v4l2l_buffer *buf; + MARK(); + + buf = vma->vm_private_data; + buf->use_count--; +} + +static struct vm_operations_struct vm_ops = { + .open = vm_open, + .close = vm_close, +}; + +static int v4l2_loopback_mmap(struct file *file, struct vm_area_struct *vma) +{ + unsigned long addr; + unsigned long start; + unsigned long size; + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + struct v4l2l_buffer *buffer = NULL; + MARK(); + + start = (unsigned long)vma->vm_start; + size = (unsigned long)(vma->vm_end - vma->vm_start); + + dev = v4l2loopback_getdevice(file); + opener = fh_to_opener(file->private_data); + + if (size > dev->buffer_size) { + dprintk("userspace tries to mmap too much, fail\n"); + return -EINVAL; + } + if (opener->timeout_image_io) { + /* we are going to map the timeout_image_buffer */ + if ((vma->vm_pgoff << PAGE_SHIFT) != + dev->buffer_size * MAX_BUFFERS) { + dprintk("invalid mmap offset for timeout_image_io mode\n"); + return -EINVAL; + } + } else if ((vma->vm_pgoff << PAGE_SHIFT) > + dev->buffer_size * (dev->buffers_number - 1)) { + dprintk("userspace tries to mmap too far, fail\n"); + return -EINVAL; + } + + /* FIXXXXXME: allocation should not happen here! */ + if (NULL == dev->image) + if (allocate_buffers(dev) < 0) + return -EINVAL; + + if (opener->timeout_image_io) { + buffer = &dev->timeout_image_buffer; + addr = (unsigned long)dev->timeout_image; + } else { + int i; + for (i = 0; i < dev->buffers_number; ++i) { + buffer = &dev->buffers[i]; + if ((buffer->buffer.m.offset >> PAGE_SHIFT) == + vma->vm_pgoff) + break; + } + + if (NULL == buffer) + return -EINVAL; + + addr = (unsigned long)dev->image + + (vma->vm_pgoff << PAGE_SHIFT); + } + + while (size > 0) { + struct page *page; + + page = (void *)vmalloc_to_page((void *)addr); + + if (vm_insert_page(vma, start, page) < 0) + return -EAGAIN; + + start += PAGE_SIZE; + addr += PAGE_SIZE; + size -= PAGE_SIZE; + } + + vma->vm_ops = &vm_ops; + vma->vm_private_data = buffer; + buffer->buffer.flags |= V4L2_BUF_FLAG_MAPPED; + + vm_open(vma); + + MARK(); + return 0; +} + +static unsigned int v4l2_loopback_poll(struct file *file, + struct poll_table_struct *pts) +{ + struct v4l2_loopback_opener *opener; + struct v4l2_loopback_device *dev; + __poll_t req_events = poll_requested_events(pts); + int ret_mask = 0; + MARK(); + + opener = fh_to_opener(file->private_data); + dev = v4l2loopback_getdevice(file); + + if (req_events & POLLPRI) { + if (!v4l2_event_pending(&opener->fh)) + poll_wait(file, &opener->fh.wait, pts); + if (v4l2_event_pending(&opener->fh)) { + ret_mask |= POLLPRI; + if (!(req_events & DEFAULT_POLLMASK)) + return ret_mask; + } + } + + switch (opener->type) { + case WRITER: + ret_mask |= POLLOUT | POLLWRNORM; + break; + case READER: + if (!can_read(dev, opener)) { + if (ret_mask) + return ret_mask; + poll_wait(file, &dev->read_event, pts); + } + if (can_read(dev, opener)) + ret_mask |= POLLIN | POLLRDNORM; + if (v4l2_event_pending(&opener->fh)) + ret_mask |= POLLPRI; + break; + default: + break; + } + + MARK(); + return ret_mask; +} + +/* do not want to limit device opens, it can be as many readers as user want, + * writers are limited by means of setting writer field */ +static int v4l2_loopback_open(struct file *file) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_opener *opener; + MARK(); + dev = v4l2loopback_getdevice(file); + if (dev->open_count.counter >= dev->max_openers) + return -EBUSY; + /* kfree on close */ + opener = kzalloc(sizeof(*opener), GFP_KERNEL); + if (opener == NULL) + return -ENOMEM; + + v4l2_fh_init(&opener->fh, video_devdata(file)); + file->private_data = &opener->fh; + atomic_inc(&dev->open_count); + + opener->timeout_image_io = dev->timeout_image_io; + dev->timeout_image_io = 0; + + if (opener->timeout_image_io) { + int r = allocate_timeout_image(dev); + + if (r < 0) { + dprintk("timeout image allocation failed\n"); + return r; + } + } + + v4l2_fh_add(&opener->fh); + dprintk("opened dev:%p with image:%p\n", dev, dev ? dev->image : NULL); + MARK(); + return 0; +} + +static int v4l2_loopback_close(struct file *file) +{ + struct v4l2_loopback_opener *opener; + struct v4l2_loopback_device *dev; + int iswriter = 0; + MARK(); + + opener = fh_to_opener(file->private_data); + dev = v4l2loopback_getdevice(file); + + if (WRITER == opener->type) + iswriter = 1; + + atomic_dec(&dev->open_count); + if (dev->open_count.counter == 0) { + del_timer_sync(&dev->sustain_timer); + del_timer_sync(&dev->timeout_timer); + } + try_free_buffers(dev); + + v4l2_fh_del(&opener->fh); + v4l2_fh_exit(&opener->fh); + + kfree(opener); + if (iswriter) { + dev->ready_for_output = 1; + } + MARK(); + return 0; +} + +static ssize_t v4l2_loopback_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + int read_index; + struct v4l2_loopback_device *dev; + struct v4l2_buffer *b; + MARK(); + + dev = v4l2loopback_getdevice(file); + + read_index = get_capture_buffer(file); + if (read_index < 0) + return read_index; + if (count > dev->buffer_size) + count = dev->buffer_size; + b = &dev->buffers[read_index].buffer; + if (count > b->bytesused) + count = b->bytesused; + if (copy_to_user((void *)buf, (void *)(dev->image + b->m.offset), + count)) { + printk(KERN_ERR + "v4l2-loopback: failed copy_to_user() in read buf\n"); + return -EFAULT; + } + dprintkrw("leave v4l2_loopback_read()\n"); + return count; +} + +static ssize_t v4l2_loopback_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct v4l2_loopback_device *dev; + int write_index; + struct v4l2_buffer *b; + MARK(); + + dev = v4l2loopback_getdevice(file); + + /* there's at least one writer, so don't stop announcing output capabilities */ + dev->ready_for_output = 0; + + if (!dev->ready_for_capture) { + int ret = allocate_buffers(dev); + if (ret < 0) + return ret; + dev->ready_for_capture = 1; + } + dprintkrw("v4l2_loopback_write() trying to write %zu bytes\n", count); + if (count > dev->buffer_size) + count = dev->buffer_size; + + write_index = dev->write_position % dev->used_buffers; + b = &dev->buffers[write_index].buffer; + + if (copy_from_user((void *)(dev->image + b->m.offset), (void *)buf, + count)) { + printk(KERN_ERR + "v4l2-loopback: failed copy_from_user() in write buf, could not write %zu\n", + count); + return -EFAULT; + } + v4l2l_get_timestamp(b); + b->bytesused = count; + b->sequence = dev->write_position; + buffer_written(dev, &dev->buffers[write_index]); + wake_up_all(&dev->read_event); + dprintkrw("leave v4l2_loopback_write()\n"); + return count; +} + +/* init functions */ +/* frees buffers, if already allocated */ +static int free_buffers(struct v4l2_loopback_device *dev) +{ + MARK(); + dprintk("freeing image@%p for dev:%p\n", dev ? dev->image : NULL, dev); + if (dev->image) { + vfree(dev->image); + dev->image = NULL; + } + if (dev->timeout_image) { + vfree(dev->timeout_image); + dev->timeout_image = NULL; + } + dev->imagesize = 0; + + return 0; +} +/* frees buffers, if they are no longer needed */ +static void try_free_buffers(struct v4l2_loopback_device *dev) +{ + MARK(); + if (0 == dev->open_count.counter && !dev->keep_format) { + free_buffers(dev); + dev->ready_for_capture = 0; + dev->buffer_size = 0; + dev->write_position = 0; + } +} +/* allocates buffers, if buffer_size is set */ +static int allocate_buffers(struct v4l2_loopback_device *dev) +{ + MARK(); + /* vfree on close file operation in case no open handles left */ + if (0 == dev->buffer_size) + return -EINVAL; + + if (dev->image) { + dprintk("allocating buffers again: %ld %ld\n", + dev->buffer_size * dev->buffers_number, dev->imagesize); + /* FIXME: prevent double allocation more intelligently! */ + if (dev->buffer_size * dev->buffers_number == dev->imagesize) + return 0; + + /* if there is only one writer, no problem should occur */ + if (dev->open_count.counter == 1) + free_buffers(dev); + else + return -EINVAL; + } + + dev->imagesize = dev->buffer_size * dev->buffers_number; + + dprintk("allocating %ld = %ldx%d\n", dev->imagesize, dev->buffer_size, + dev->buffers_number); + + dev->image = vmalloc(dev->imagesize); + if (dev->timeout_jiffies > 0) + allocate_timeout_image(dev); + + if (dev->image == NULL) + return -ENOMEM; + dprintk("vmallocated %ld bytes\n", dev->imagesize); + MARK(); + init_buffers(dev); + return 0; +} + +/* init inner buffers, they are capture mode and flags are set as + * for capture mod buffers */ +static void init_buffers(struct v4l2_loopback_device *dev) +{ + int i; + int buffer_size; + int bytesused; + MARK(); + + buffer_size = dev->buffer_size; + bytesused = dev->pix_format.sizeimage; + + for (i = 0; i < dev->buffers_number; ++i) { + struct v4l2_buffer *b = &dev->buffers[i].buffer; + b->index = i; + b->bytesused = bytesused; + b->length = buffer_size; + b->field = V4L2_FIELD_NONE; + b->flags = 0; +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 6, 1) + b->input = 0; +#endif + b->m.offset = i * buffer_size; + b->memory = V4L2_MEMORY_MMAP; + b->sequence = 0; + b->timestamp.tv_sec = 0; + b->timestamp.tv_usec = 0; + b->type = V4L2_BUF_TYPE_VIDEO_CAPTURE; + + v4l2l_get_timestamp(b); + } + dev->timeout_image_buffer = dev->buffers[0]; + dev->timeout_image_buffer.buffer.m.offset = MAX_BUFFERS * buffer_size; + MARK(); +} + +static int allocate_timeout_image(struct v4l2_loopback_device *dev) +{ + MARK(); + if (dev->buffer_size <= 0) + return -EINVAL; + + if (dev->timeout_image == NULL) { + dev->timeout_image = v4l2l_vzalloc(dev->buffer_size); + if (dev->timeout_image == NULL) + return -ENOMEM; + } + return 0; +} + +/* fills and register video device */ +static void init_vdev(struct video_device *vdev, int nr) +{ + MARK(); + +#ifdef V4L2LOOPBACK_WITH_STD + vdev->tvnorms = V4L2_STD_ALL; +#endif /* V4L2LOOPBACK_WITH_STD */ + + vdev->vfl_type = VFL_TYPE_VIDEO; + vdev->fops = &v4l2_loopback_fops; + vdev->ioctl_ops = &v4l2_loopback_ioctl_ops; + vdev->release = &video_device_release; + vdev->minor = -1; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0) + vdev->device_caps = V4L2_CAP_VIDEO_CAPTURE | V4L2_CAP_VIDEO_OUTPUT | + V4L2_CAP_READWRITE | V4L2_CAP_STREAMING; +#ifdef V4L2_CAP_VIDEO_M2M + vdev->device_caps |= V4L2_CAP_VIDEO_M2M; +#endif +#endif /* >=linux-4.7.0 */ + + if (debug > 1) +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 20, 0) + vdev->debug = V4L2_DEBUG_IOCTL | V4L2_DEBUG_IOCTL_ARG; +#else + vdev->dev_debug = + V4L2_DEV_DEBUG_IOCTL | V4L2_DEV_DEBUG_IOCTL_ARG; +#endif + + /* since kernel-3.7, there is a new field 'vfl_dir' that has to be + * set to VFL_DIR_M2M for bidirectional devices */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 7, 0) + vdev->vfl_dir = VFL_DIR_M2M; +#endif + + MARK(); +} + +/* init default capture parameters, only fps may be changed in future */ +static void init_capture_param(struct v4l2_captureparm *capture_param) +{ + MARK(); + capture_param->capability = 0; + capture_param->capturemode = 0; + capture_param->extendedmode = 0; + capture_param->readbuffers = max_buffers; + capture_param->timeperframe.numerator = 1; + capture_param->timeperframe.denominator = 30; +} + +static void check_timers(struct v4l2_loopback_device *dev) +{ + if (!dev->ready_for_capture) + return; + + if (dev->timeout_jiffies > 0 && !timer_pending(&dev->timeout_timer)) + mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies); + if (dev->sustain_framerate && !timer_pending(&dev->sustain_timer)) + mod_timer(&dev->sustain_timer, + jiffies + dev->frame_jiffies * 3 / 2); +} +#ifdef HAVE_TIMER_SETUP +static void sustain_timer_clb(struct timer_list *t) +{ + struct v4l2_loopback_device *dev = from_timer(dev, t, sustain_timer); +#else +static void sustain_timer_clb(unsigned long nr) +{ + struct v4l2_loopback_device *dev = + idr_find(&v4l2loopback_index_idr, nr); +#endif + spin_lock(&dev->lock); + if (dev->sustain_framerate) { + dev->reread_count++; + dprintkrw("reread: %d %d\n", dev->write_position, + dev->reread_count); + if (dev->reread_count == 1) + mod_timer(&dev->sustain_timer, + jiffies + max(1UL, dev->frame_jiffies / 2)); + else + mod_timer(&dev->sustain_timer, + jiffies + dev->frame_jiffies); + wake_up_all(&dev->read_event); + } + spin_unlock(&dev->lock); +} +#ifdef HAVE_TIMER_SETUP +static void timeout_timer_clb(struct timer_list *t) +{ + struct v4l2_loopback_device *dev = from_timer(dev, t, timeout_timer); +#else +static void timeout_timer_clb(unsigned long nr) +{ + struct v4l2_loopback_device *dev = + idr_find(&v4l2loopback_index_idr, nr); +#endif + spin_lock(&dev->lock); + if (dev->timeout_jiffies > 0) { + dev->timeout_happened = 1; + mod_timer(&dev->timeout_timer, jiffies + dev->timeout_jiffies); + wake_up_all(&dev->read_event); + } + spin_unlock(&dev->lock); +} + +/* init loopback main structure */ +#define DEFAULT_FROM_CONF(confmember, default_condition, default_value) \ + ((conf) ? \ + ((conf->confmember default_condition) ? (default_value) : \ + (conf->confmember)) : \ + default_value) + +static int v4l2_loopback_add(struct v4l2_loopback_config *conf, int *ret_nr) +{ + struct v4l2_loopback_device *dev; + struct v4l2_ctrl_handler *hdl; + + int err = -ENOMEM; + + int _max_width = DEFAULT_FROM_CONF( + max_width, <= V4L2LOOPBACK_SIZE_MIN_WIDTH, max_width); + int _max_height = DEFAULT_FROM_CONF( + max_height, <= V4L2LOOPBACK_SIZE_MIN_HEIGHT, max_height); + bool _announce_all_caps = (conf && conf->announce_all_caps >= 0) ? + (conf->announce_all_caps) : + V4L2LOOPBACK_DEFAULT_EXCLUSIVECAPS; + + int _max_buffers = DEFAULT_FROM_CONF(max_buffers, <= 0, max_buffers); + int _max_openers = DEFAULT_FROM_CONF(max_openers, <= 0, max_openers); + + int nr = -1; + if (conf) { + if (conf->capture_nr >= 0 && + conf->output_nr == conf->capture_nr) { + nr = conf->capture_nr; + } else if (conf->capture_nr < 0 && conf->output_nr < 0) { + nr = -1; + } else if (conf->capture_nr < 0) { + nr = conf->output_nr; + } else if (conf->output_nr < 0) { + nr = conf->capture_nr; + } else { + printk(KERN_ERR + "split OUTPUT and CAPTURE devices not yet supported."); + printk(KERN_INFO + "both devices must have the same number (%d != %d).", + conf->output_nr, conf->capture_nr); + return -EINVAL; + } + } + + if (idr_find(&v4l2loopback_index_idr, nr)) + return -EEXIST; + + dprintk("creating v4l2loopback-device #%d\n", nr); + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + /* allocate id, if @id >= 0, we're requesting that specific id */ + if (nr >= 0) { + err = idr_alloc(&v4l2loopback_index_idr, dev, nr, nr + 1, + GFP_KERNEL); + if (err == -ENOSPC) + err = -EEXIST; + } else { + err = idr_alloc(&v4l2loopback_index_idr, dev, 0, 0, GFP_KERNEL); + } + if (err < 0) + goto out_free_dev; + nr = err; + err = -ENOMEM; + + if (conf && conf->card_label && *(conf->card_label)) { + snprintf(dev->card_label, sizeof(dev->card_label), "%s", + conf->card_label); + } else { + snprintf(dev->card_label, sizeof(dev->card_label), + "Dummy video device (0x%04X)", nr); + } + snprintf(dev->v4l2_dev.name, sizeof(dev->v4l2_dev.name), + "v4l2loopback-%03d", nr); + + err = v4l2_device_register(NULL, &dev->v4l2_dev); + if (err) + goto out_free_idr; + MARK(); + + dev->vdev = video_device_alloc(); + if (dev->vdev == NULL) { + err = -ENOMEM; + goto out_unregister; + } + video_set_drvdata(dev->vdev, + kzalloc(sizeof(struct v4l2loopback_private), + GFP_KERNEL)); + if (video_get_drvdata(dev->vdev) == NULL) { + err = -ENOMEM; + goto out_unregister; + } + + MARK(); + snprintf(dev->vdev->name, sizeof(dev->vdev->name), dev->card_label); + + ((struct v4l2loopback_private *)video_get_drvdata(dev->vdev)) + ->device_nr = nr; + + init_vdev(dev->vdev, nr); + dev->vdev->v4l2_dev = &dev->v4l2_dev; + init_capture_param(&dev->capture_param); + set_timeperframe(dev, &dev->capture_param.timeperframe); + dev->keep_format = 0; + dev->sustain_framerate = 0; + + dev->announce_all_caps = _announce_all_caps; + dev->max_width = _max_width; + dev->max_height = _max_height; + dev->max_openers = _max_openers; + dev->buffers_number = dev->used_buffers = _max_buffers; + + dev->write_position = 0; + + MARK(); + spin_lock_init(&dev->lock); + INIT_LIST_HEAD(&dev->outbufs_list); + if (list_empty(&dev->outbufs_list)) { + int i; + + for (i = 0; i < dev->used_buffers; ++i) + list_add_tail(&dev->buffers[i].list_head, + &dev->outbufs_list); + } + memset(dev->bufpos2index, 0, sizeof(dev->bufpos2index)); + atomic_set(&dev->open_count, 0); + dev->ready_for_capture = 0; + dev->ready_for_output = 1; + + dev->buffer_size = 0; + dev->image = NULL; + dev->imagesize = 0; +#ifdef HAVE_TIMER_SETUP + timer_setup(&dev->sustain_timer, sustain_timer_clb, 0); + timer_setup(&dev->timeout_timer, timeout_timer_clb, 0); +#else + setup_timer(&dev->sustain_timer, sustain_timer_clb, nr); + setup_timer(&dev->timeout_timer, timeout_timer_clb, nr); +#endif + dev->reread_count = 0; + dev->timeout_jiffies = 0; + dev->timeout_image = NULL; + dev->timeout_happened = 0; + + hdl = &dev->ctrl_handler; + err = v4l2_ctrl_handler_init(hdl, 4); + if (err) + goto out_unregister; + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_keepformat, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_sustainframerate, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeout, NULL); + v4l2_ctrl_new_custom(hdl, &v4l2loopback_ctrl_timeoutimageio, NULL); + if (hdl->error) { + err = hdl->error; + goto out_free_handler; + } + dev->v4l2_dev.ctrl_handler = hdl; + + err = v4l2_ctrl_handler_setup(hdl); + + /* FIXME set buffers to 0 */ + + /* Set initial format */ + dev->pix_format.width = 0; /* V4L2LOOPBACK_SIZE_DEFAULT_WIDTH; */ + dev->pix_format.height = 0; /* V4L2LOOPBACK_SIZE_DEFAULT_HEIGHT; */ + dev->pix_format.pixelformat = formats[0].fourcc; + dev->pix_format.colorspace = + V4L2_COLORSPACE_SRGB; /* do we need to set this ? */ + dev->pix_format.field = V4L2_FIELD_NONE; + + dev->buffer_size = PAGE_ALIGN(dev->pix_format.sizeimage); + dprintk("buffer_size = %ld (=%d)\n", dev->buffer_size, + dev->pix_format.sizeimage); + allocate_buffers(dev); + + init_waitqueue_head(&dev->read_event); + + /* register the device -> it creates /dev/video* */ + if (video_register_device(dev->vdev, VFL_TYPE_VIDEO, nr) < 0) { + printk(KERN_ERR + "v4l2loopback: failed video_register_device()\n"); + err = -EFAULT; + goto out_free_device; + } + v4l2loopback_create_sysfs(dev->vdev); + + MARK(); + if (ret_nr) + *ret_nr = dev->vdev->num; + return 0; + +out_free_device: + video_device_release(dev->vdev); +out_free_handler: + v4l2_ctrl_handler_free(&dev->ctrl_handler); +out_unregister: + v4l2_device_unregister(&dev->v4l2_dev); +out_free_idr: + idr_remove(&v4l2loopback_index_idr, nr); +out_free_dev: + kfree(dev); + return err; +} + +static void v4l2_loopback_remove(struct v4l2_loopback_device *dev) +{ + free_buffers(dev); + v4l2loopback_remove_sysfs(dev->vdev); + kfree(video_get_drvdata(dev->vdev)); + video_unregister_device(dev->vdev); + v4l2_device_unregister(&dev->v4l2_dev); + v4l2_ctrl_handler_free(&dev->ctrl_handler); + kfree(dev); +} + +static long v4l2loopback_control_ioctl(struct file *file, unsigned int cmd, + unsigned long parm) +{ + struct v4l2_loopback_device *dev; + struct v4l2_loopback_config conf; + struct v4l2_loopback_config *confptr = &conf; + int device_nr; + int ret; + + ret = mutex_lock_killable(&v4l2loopback_ctl_mutex); + if (ret) + return ret; + + ret = -EINVAL; + switch (cmd) { + default: + ret = -ENOSYS; + break; + /* add a v4l2loopback device (pair), based on the user-provided specs */ + case V4L2LOOPBACK_CTL_ADD: + if (parm) { + if ((ret = copy_from_user(&conf, (void *)parm, + sizeof(conf))) < 0) + break; + } else + confptr = NULL; + ret = v4l2_loopback_add(confptr, &device_nr); + if (ret >= 0) + ret = device_nr; + break; + /* remove a v4l2loopback device (both capture and output) */ + case V4L2LOOPBACK_CTL_REMOVE: + ret = v4l2loopback_lookup((int)parm, &dev); + if (ret >= 0 && dev) { + int nr = ret; + ret = -EBUSY; + if (dev->open_count.counter > 0) + break; + idr_remove(&v4l2loopback_index_idr, nr); + v4l2_loopback_remove(dev); + ret = 0; + }; + break; + /* get information for a loopback device. + * this is mostly about limits (which cannot be queried directly with VIDIOC_G_FMT and friends + */ + case V4L2LOOPBACK_CTL_QUERY: + if (!parm) + break; + if ((ret = copy_from_user(&conf, (void *)parm, sizeof(conf))) < + 0) + break; + device_nr = + (conf.output_nr < 0) ? conf.capture_nr : conf.output_nr; + MARK(); + /* get the device from either capture_nr or output_nr (whatever is valid) */ + if ((ret = v4l2loopback_lookup(device_nr, &dev)) < 0) + break; + MARK(); + /* if we got the device from output_nr and there is a valid capture_nr, + * make sure that both refer to the same device (or bail out) + */ + if ((device_nr != conf.capture_nr) && (conf.capture_nr >= 0) && + (ret != v4l2loopback_lookup(conf.capture_nr, 0))) + break; + MARK(); + /* if otoh, we got the device from capture_nr and there is a valid output_nr, + * make sure that both refer to the same device (or bail out) + */ + if ((device_nr != conf.output_nr) && (conf.output_nr >= 0) && + (ret != v4l2loopback_lookup(conf.output_nr, 0))) + break; + MARK(); + + /* v4l2_loopback_config identified a single device, so fetch the data */ + snprintf(conf.card_label, sizeof(conf.card_label), "%s", + dev->card_label); + MARK(); + conf.output_nr = conf.capture_nr = dev->vdev->num; + conf.max_width = dev->max_width; + conf.max_height = dev->max_height; + conf.announce_all_caps = dev->announce_all_caps; + conf.max_buffers = dev->buffers_number; + conf.max_openers = dev->max_openers; + conf.debug = debug; + MARK(); + if (copy_to_user((void *)parm, &conf, sizeof(conf))) { + ret = -EFAULT; + break; + } + MARK(); + ret = 0; + ; + break; + } + + MARK(); + mutex_unlock(&v4l2loopback_ctl_mutex); + MARK(); + return ret; +} + +/* LINUX KERNEL */ + +static const struct file_operations v4l2loopback_ctl_fops = { + // clang-format off + .open = nonseekable_open, + .unlocked_ioctl = v4l2loopback_control_ioctl, + .compat_ioctl = v4l2loopback_control_ioctl, + .owner = THIS_MODULE, + .llseek = noop_llseek, + // clang-format on +}; + +static struct miscdevice v4l2loopback_misc = { + // clang-format off + .minor = MISC_DYNAMIC_MINOR, + .name = "v4l2loopback", + .fops = &v4l2loopback_ctl_fops, + // clang-format on +}; + +static const struct v4l2_file_operations v4l2_loopback_fops = { + // clang-format off + .owner = THIS_MODULE, + .open = v4l2_loopback_open, + .release = v4l2_loopback_close, + .read = v4l2_loopback_read, + .write = v4l2_loopback_write, + .poll = v4l2_loopback_poll, + .mmap = v4l2_loopback_mmap, + .unlocked_ioctl = video_ioctl2, + // clang-format on +}; + +static const struct v4l2_ioctl_ops v4l2_loopback_ioctl_ops = { + // clang-format off + .vidioc_querycap = &vidioc_querycap, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 29) + .vidioc_enum_framesizes = &vidioc_enum_framesizes, + .vidioc_enum_frameintervals = &vidioc_enum_frameintervals, +#endif + +#ifndef HAVE__V4L2_CTRLS + .vidioc_queryctrl = &vidioc_queryctrl, + .vidioc_g_ctrl = &vidioc_g_ctrl, + .vidioc_s_ctrl = &vidioc_s_ctrl, +#endif /* HAVE__V4L2_CTRLS */ + + .vidioc_enum_output = &vidioc_enum_output, + .vidioc_g_output = &vidioc_g_output, + .vidioc_s_output = &vidioc_s_output, + + .vidioc_enum_input = &vidioc_enum_input, + .vidioc_g_input = &vidioc_g_input, + .vidioc_s_input = &vidioc_s_input, + + .vidioc_enum_fmt_vid_cap = &vidioc_enum_fmt_cap, + .vidioc_g_fmt_vid_cap = &vidioc_g_fmt_cap, + .vidioc_s_fmt_vid_cap = &vidioc_s_fmt_cap, + .vidioc_try_fmt_vid_cap = &vidioc_try_fmt_cap, + + .vidioc_enum_fmt_vid_out = &vidioc_enum_fmt_out, + .vidioc_s_fmt_vid_out = &vidioc_s_fmt_out, + .vidioc_g_fmt_vid_out = &vidioc_g_fmt_out, + .vidioc_try_fmt_vid_out = &vidioc_try_fmt_out, + +#ifdef V4L2L_OVERLAY + .vidioc_s_fmt_vid_overlay = &vidioc_s_fmt_overlay, + .vidioc_g_fmt_vid_overlay = &vidioc_g_fmt_overlay, +#endif + +#ifdef V4L2LOOPBACK_WITH_STD + .vidioc_s_std = &vidioc_s_std, + .vidioc_g_std = &vidioc_g_std, + .vidioc_querystd = &vidioc_querystd, +#endif /* V4L2LOOPBACK_WITH_STD */ + + .vidioc_g_parm = &vidioc_g_parm, + .vidioc_s_parm = &vidioc_s_parm, + + .vidioc_reqbufs = &vidioc_reqbufs, + .vidioc_querybuf = &vidioc_querybuf, + .vidioc_qbuf = &vidioc_qbuf, + .vidioc_dqbuf = &vidioc_dqbuf, + + .vidioc_streamon = &vidioc_streamon, + .vidioc_streamoff = &vidioc_streamoff, + +#ifdef CONFIG_VIDEO_V4L1_COMPAT + .vidiocgmbuf = &vidiocgmbuf, +#endif + + .vidioc_subscribe_event = &vidioc_subscribe_event, + .vidioc_unsubscribe_event = &v4l2_event_unsubscribe, + // clang-format on +}; + +static int free_device_cb(int id, void *ptr, void *data) +{ + struct v4l2_loopback_device *dev = ptr; + v4l2_loopback_remove(dev); + return 0; +} +static void free_devices(void) +{ + idr_for_each(&v4l2loopback_index_idr, &free_device_cb, NULL); + idr_destroy(&v4l2loopback_index_idr); +} + +static int __init v4l2loopback_init_module(void) +{ + int err; + int i; + MARK(); + + err = misc_register(&v4l2loopback_misc); + if (err < 0) + return err; + + if (devices < 0) { + devices = 1; + + /* try guessing the devices from the "video_nr" parameter */ + for (i = MAX_DEVICES - 1; i >= 0; i--) { + if (video_nr[i] >= 0) { + devices = i + 1; + break; + } + } + } + + if (devices > MAX_DEVICES) { + devices = MAX_DEVICES; + printk(KERN_INFO + "v4l2loopback: number of initial devices is limited to: %d\n", + MAX_DEVICES); + } + + if (max_buffers > MAX_BUFFERS) { + max_buffers = MAX_BUFFERS; + printk(KERN_INFO + "v4l2loopback: number of buffers is limited to: %d\n", + MAX_BUFFERS); + } + + if (max_openers < 0) { + printk(KERN_INFO + "v4l2loopback: allowing %d openers rather than %d\n", + 2, max_openers); + max_openers = 2; + } + + if (max_width < 1) { + max_width = V4L2LOOPBACK_SIZE_DEFAULT_MAX_WIDTH; + printk(KERN_INFO "v4l2loopback: using max_width %d\n", + max_width); + } + if (max_height < 1) { + max_height = V4L2LOOPBACK_SIZE_DEFAULT_MAX_HEIGHT; + printk(KERN_INFO "v4l2loopback: using max_height %d\n", + max_height); + } + + /* kfree on module release */ + for (i = 0; i < devices; i++) { + struct v4l2_loopback_config cfg = { + // clang-format off + .output_nr = video_nr[i], + .capture_nr = video_nr[i], + .max_width = max_width, + .max_height = max_height, + .announce_all_caps = (!exclusive_caps[i]), + .max_buffers = max_buffers, + .max_openers = max_openers, + .debug = debug, + // clang-format on + }; + cfg.card_label[0] = 0; + if (card_label[i]) + snprintf(cfg.card_label, sizeof(cfg.card_label), "%s", + card_label[i]); + err = v4l2_loopback_add(&cfg, 0); + if (err) { + free_devices(); + goto error; + } + } + + dprintk("module installed\n"); + + printk(KERN_INFO "v4l2loopback driver version %d.%d.%d loaded\n", + // clang-format off + (V4L2LOOPBACK_VERSION_CODE >> 16) & 0xff, + (V4L2LOOPBACK_VERSION_CODE >> 8) & 0xff, + (V4L2LOOPBACK_VERSION_CODE ) & 0xff); + // clang-format on + + return 0; +error: + misc_deregister(&v4l2loopback_misc); + return err; +} + +#ifdef MODULE +static void v4l2loopback_cleanup_module(void) +{ + MARK(); + /* unregister the device -> it deletes /dev/video* */ + free_devices(); + /* and get rid of /dev/v4l2loopback */ + misc_deregister(&v4l2loopback_misc); + dprintk("module removed\n"); +} +#endif + +MODULE_ALIAS_MISCDEV(MISC_DYNAMIC_MINOR); +MODULE_ALIAS("devname:v4l2loopback"); + +#ifdef MODULE +int __init init_module(void) +{ + return v4l2loopback_init_module(); +} +void __exit cleanup_module(void) +{ + return v4l2loopback_cleanup_module(); +} +#else +late_initcall(v4l2loopback_init_module); +#endif + +/* + * fake usage of unused functions + */ +#ifdef HAVE__V4L2_CTRLS +static int vidioc_queryctrl(struct file *file, void *fh, + struct v4l2_queryctrl *q) __attribute__((unused)); +static int vidioc_g_ctrl(struct file *file, void *fh, struct v4l2_control *c) + __attribute__((unused)); +static int vidioc_s_ctrl(struct file *file, void *fh, struct v4l2_control *c) + __attribute__((unused)); +#endif /* HAVE__V4L2_CTRLS */ diff --git a/drivers/media/v4l2-core/v4l2loopback.h b/drivers/media/v4l2-core/v4l2loopback.h new file mode 100644 index 000000000000..77c671971ef2 --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback.h @@ -0,0 +1,92 @@ +/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ +/* + * v4l2loopback.h + * + * Written by IOhannes m zmölnig, 7/1/20. + * + * Copyright 2020 by IOhannes m zmölnig. Redistribution of this file is + * permitted under the GNU General Public License. + */ +#ifndef _V4L2LOOPBACK_H +#define _V4L2LOOPBACK_H + +#define V4L2LOOPBACK_VERSION_MAJOR 0 +#define V4L2LOOPBACK_VERSION_MINOR 12 +#define V4L2LOOPBACK_VERSION_BUGFIX 5 + +/* /dev/v4l2loopback interface */ + +struct v4l2_loopback_config { + /** + * the device-number (/dev/video) + * V4L2LOOPBACK_CTL_ADD: + * setting this to a value<0, will allocate an available one + * if nr>=0 and the device already exists, the ioctl will EEXIST + * if output_nr and capture_nr are the same, only a single device will be created + * + * V4L2LOOPBACK_CTL_QUERY: + * either both output_nr and capture_nr must refer to the same loopback, + * or one (and only one) of them must be -1 + * + */ + int output_nr; + int capture_nr; + + /** + * a nice name for your device + * if (*card_label)==0, an automatic name is assigned + */ + char card_label[32]; + + /** + * maximum allowed frame size + * if too low, default values are used + */ + int max_width; + int max_height; + + /** + * whether to announce OUTPUT/CAPTURE capabilities exclusively + * for this device or not + * (!exclusive_caps) + * FIXXME: this ought to be removed (if superseded by output_nr vs capture_nr) + */ + int announce_all_caps; + + /** + * number of buffers to allocate for the queue + * if set to <=0, default values are used + */ + int max_buffers; + + /** + * how many consumers are allowed to open this device concurrently + * if set to <=0, default values are used + */ + int max_openers; + + /** + * set the debugging level for this device + */ + int debug; +}; + +/* a pointer to a (struct v4l2_loopback_config) that has all values you wish to impose on the + * to-be-created device set. + * if the ptr is NULL, a new device is created with default values at the driver's discretion. + * + * returns the device_nr of the OUTPUT device (which can be used with V4L2LOOPBACK_CTL_QUERY, + * to get more information on the device) + */ +#define V4L2LOOPBACK_CTL_ADD 0x4C80 + +/* a pointer to a (struct v4l2_loopback_config) that has output_nr and/or capture_nr set + * (the two values must either refer to video-devices associated with the same loopback device + * or exactly one of them must be <0 + */ +#define V4L2LOOPBACK_CTL_QUERY 0x4C82 + +/* the device-number (either CAPTURE or OUTPUT) associated with the loopback-device */ +#define V4L2LOOPBACK_CTL_REMOVE 0x4C81 + +#endif /* _V4L2LOOPBACK_H */ diff --git a/drivers/media/v4l2-core/v4l2loopback_formats.h b/drivers/media/v4l2-core/v4l2loopback_formats.h new file mode 100644 index 000000000000..6cb6f842cd1a --- /dev/null +++ b/drivers/media/v4l2-core/v4l2loopback_formats.h @@ -0,0 +1,427 @@ +#ifndef V4L2_PIX_FMT_VP9 +#define V4L2_PIX_FMT_VP9 v4l2_fourcc('V', 'P', '9', '0') +#endif +#ifndef V4L2_PIX_FMT_HEVC +#define V4L2_PIX_FMT_HEVC v4l2_fourcc('H', 'E', 'V', 'C') +#endif + +/* here come the packed formats */ +{ + .name = "32 bpp RGB, le", + .fourcc = V4L2_PIX_FMT_BGR32, + .depth = 32, + .flags = 0, +}, + { + .name = "32 bpp RGB, be", + .fourcc = V4L2_PIX_FMT_RGB32, + .depth = 32, + .flags = 0, + }, + { + .name = "24 bpp RGB, le", + .fourcc = V4L2_PIX_FMT_BGR24, + .depth = 24, + .flags = 0, + }, + { + .name = "24 bpp RGB, be", + .fourcc = V4L2_PIX_FMT_RGB24, + .depth = 24, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_RGB332 + { + .name = "8 bpp RGB-3-3-2", + .fourcc = V4L2_PIX_FMT_RGB332, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB332 */ +#ifdef V4L2_PIX_FMT_RGB444 + { + .name = "16 bpp RGB (xxxxrrrr ggggbbbb)", + .fourcc = V4L2_PIX_FMT_RGB444, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB444 */ +#ifdef V4L2_PIX_FMT_RGB555 + { + .name = "16 bpp RGB-5-5-5", + .fourcc = V4L2_PIX_FMT_RGB555, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB555 */ +#ifdef V4L2_PIX_FMT_RGB565 + { + .name = "16 bpp RGB-5-6-5", + .fourcc = V4L2_PIX_FMT_RGB565, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB565 */ +#ifdef V4L2_PIX_FMT_RGB555X + { + .name = "16 bpp RGB-5-5-5 BE", + .fourcc = V4L2_PIX_FMT_RGB555X, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB555X */ +#ifdef V4L2_PIX_FMT_RGB565X + { + .name = "16 bpp RGB-5-6-5 BE", + .fourcc = V4L2_PIX_FMT_RGB565X, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_RGB565X */ +#ifdef V4L2_PIX_FMT_BGR666 + { + .name = "18 bpp BGR-6-6-6", + .fourcc = V4L2_PIX_FMT_BGR666, + .depth = 18, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_BGR666 */ + { + .name = "4:2:2, packed, YUYV", + .fourcc = V4L2_PIX_FMT_YUYV, + .depth = 16, + .flags = 0, + }, + { + .name = "4:2:2, packed, UYVY", + .fourcc = V4L2_PIX_FMT_UYVY, + .depth = 16, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_YVYU + { + .name = "4:2:2, packed YVYU", + .fourcc = V4L2_PIX_FMT_YVYU, + .depth = 16, + .flags = 0, + }, +#endif +#ifdef V4L2_PIX_FMT_VYUY + { + .name = "4:2:2, packed VYUY", + .fourcc = V4L2_PIX_FMT_VYUY, + .depth = 16, + .flags = 0, + }, +#endif + { + .name = "4:2:2, packed YYUV", + .fourcc = V4L2_PIX_FMT_YYUV, + .depth = 16, + .flags = 0, + }, + { + .name = "YUV-8-8-8-8", + .fourcc = V4L2_PIX_FMT_YUV32, + .depth = 32, + .flags = 0, + }, + { + .name = "8 bpp, Greyscale", + .fourcc = V4L2_PIX_FMT_GREY, + .depth = 8, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_Y4 + { + .name = "4 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y4, + .depth = 4, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y4 */ +#ifdef V4L2_PIX_FMT_Y6 + { + .name = "6 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y6, + .depth = 6, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y6 */ +#ifdef V4L2_PIX_FMT_Y10 + { + .name = "10 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y10, + .depth = 10, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y10 */ +#ifdef V4L2_PIX_FMT_Y12 + { + .name = "12 bpp Greyscale", + .fourcc = V4L2_PIX_FMT_Y12, + .depth = 12, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_Y12 */ + { + .name = "16 bpp, Greyscale", + .fourcc = V4L2_PIX_FMT_Y16, + .depth = 16, + .flags = 0, + }, +#ifdef V4L2_PIX_FMT_YUV444 + { + .name = "16 bpp xxxxyyyy uuuuvvvv", + .fourcc = V4L2_PIX_FMT_YUV444, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV444 */ +#ifdef V4L2_PIX_FMT_YUV555 + { + .name = "16 bpp YUV-5-5-5", + .fourcc = V4L2_PIX_FMT_YUV555, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV555 */ +#ifdef V4L2_PIX_FMT_YUV565 + { + .name = "16 bpp YUV-5-6-5", + .fourcc = V4L2_PIX_FMT_YUV565, + .depth = 16, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_YUV565 */ + +/* bayer formats */ +#ifdef V4L2_PIX_FMT_SRGGB8 + { + .name = "Bayer RGGB 8bit", + .fourcc = V4L2_PIX_FMT_SRGGB8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SRGGB8 */ +#ifdef V4L2_PIX_FMT_SGRBG8 + { + .name = "Bayer GRBG 8bit", + .fourcc = V4L2_PIX_FMT_SGRBG8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SGRBG8 */ +#ifdef V4L2_PIX_FMT_SGBRG8 + { + .name = "Bayer GBRG 8bit", + .fourcc = V4L2_PIX_FMT_SGBRG8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SGBRG8 */ +#ifdef V4L2_PIX_FMT_SBGGR8 + { + .name = "Bayer BA81 8bit", + .fourcc = V4L2_PIX_FMT_SBGGR8, + .depth = 8, + .flags = 0, + }, +#endif /* V4L2_PIX_FMT_SBGGR8 */ + + /* here come the planar formats */ + { + .name = "4:1:0, planar, Y-Cr-Cb", + .fourcc = V4L2_PIX_FMT_YVU410, + .depth = 9, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:2:0, planar, Y-Cr-Cb", + .fourcc = V4L2_PIX_FMT_YVU420, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:1:0, planar, Y-Cb-Cr", + .fourcc = V4L2_PIX_FMT_YUV410, + .depth = 9, + .flags = FORMAT_FLAGS_PLANAR, + }, + { + .name = "4:2:0, planar, Y-Cb-Cr", + .fourcc = V4L2_PIX_FMT_YUV420, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#ifdef V4L2_PIX_FMT_YUV422P + { + .name = "16 bpp YVU422 planar", + .fourcc = V4L2_PIX_FMT_YUV422P, + .depth = 16, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_YUV422P */ +#ifdef V4L2_PIX_FMT_YUV411P + { + .name = "16 bpp YVU411 planar", + .fourcc = V4L2_PIX_FMT_YUV411P, + .depth = 16, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_YUV411P */ +#ifdef V4L2_PIX_FMT_Y41P + { + .name = "12 bpp YUV 4:1:1", + .fourcc = V4L2_PIX_FMT_Y41P, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_Y41P */ +#ifdef V4L2_PIX_FMT_NV12 + { + .name = "12 bpp Y/CbCr 4:2:0 ", + .fourcc = V4L2_PIX_FMT_NV12, + .depth = 12, + .flags = FORMAT_FLAGS_PLANAR, + }, +#endif /* V4L2_PIX_FMT_NV12 */ + +/* here come the compressed formats */ + +#ifdef V4L2_PIX_FMT_MJPEG + { + .name = "Motion-JPEG", + .fourcc = V4L2_PIX_FMT_MJPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MJPEG */ +#ifdef V4L2_PIX_FMT_JPEG + { + .name = "JFIF JPEG", + .fourcc = V4L2_PIX_FMT_JPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_JPEG */ +#ifdef V4L2_PIX_FMT_DV + { + .name = "DV1394", + .fourcc = V4L2_PIX_FMT_DV, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_DV */ +#ifdef V4L2_PIX_FMT_MPEG + { + .name = "MPEG-1/2/4 Multiplexed", + .fourcc = V4L2_PIX_FMT_MPEG, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG */ +#ifdef V4L2_PIX_FMT_H264 + { + .name = "H264 with start codes", + .fourcc = V4L2_PIX_FMT_H264, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264 */ +#ifdef V4L2_PIX_FMT_H264_NO_SC + { + .name = "H264 without start codes", + .fourcc = V4L2_PIX_FMT_H264_NO_SC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264_NO_SC */ +#ifdef V4L2_PIX_FMT_H264_MVC + { + .name = "H264 MVC", + .fourcc = V4L2_PIX_FMT_H264_MVC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H264_MVC */ +#ifdef V4L2_PIX_FMT_H263 + { + .name = "H263", + .fourcc = V4L2_PIX_FMT_H263, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_H263 */ +#ifdef V4L2_PIX_FMT_MPEG1 + { + .name = "MPEG-1 ES", + .fourcc = V4L2_PIX_FMT_MPEG1, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG1 */ +#ifdef V4L2_PIX_FMT_MPEG2 + { + .name = "MPEG-2 ES", + .fourcc = V4L2_PIX_FMT_MPEG2, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG2 */ +#ifdef V4L2_PIX_FMT_MPEG4 + { + .name = "MPEG-4 part 2 ES", + .fourcc = V4L2_PIX_FMT_MPEG4, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_MPEG4 */ +#ifdef V4L2_PIX_FMT_XVID + { + .name = "Xvid", + .fourcc = V4L2_PIX_FMT_XVID, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_XVID */ +#ifdef V4L2_PIX_FMT_VC1_ANNEX_G + { + .name = "SMPTE 421M Annex G compliant stream", + .fourcc = V4L2_PIX_FMT_VC1_ANNEX_G, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VC1_ANNEX_G */ +#ifdef V4L2_PIX_FMT_VC1_ANNEX_L + { + .name = "SMPTE 421M Annex L compliant stream", + .fourcc = V4L2_PIX_FMT_VC1_ANNEX_L, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VC1_ANNEX_L */ +#ifdef V4L2_PIX_FMT_VP8 + { + .name = "VP8", + .fourcc = V4L2_PIX_FMT_VP8, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VP8 */ +#ifdef V4L2_PIX_FMT_VP9 + { + .name = "VP9", + .fourcc = V4L2_PIX_FMT_VP9, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_VP9 */ +#ifdef V4L2_PIX_FMT_HEVC + { + .name = "HEVC", + .fourcc = V4L2_PIX_FMT_HEVC, + .depth = 32, + .flags = FORMAT_FLAGS_COMPRESSED, + }, +#endif /* V4L2_PIX_FMT_HEVC */ diff --git a/drivers/memory/mtk-smi.c b/drivers/memory/mtk-smi.c index ac350f8d1e20..82d09b88240e 100644 --- a/drivers/memory/mtk-smi.c +++ b/drivers/memory/mtk-smi.c @@ -130,7 +130,7 @@ static void mtk_smi_clk_disable(const struct mtk_smi *smi) int mtk_smi_larb_get(struct device *larbdev) { - int ret = pm_runtime_get_sync(larbdev); + int ret = pm_runtime_resume_and_get(larbdev); return (ret < 0) ? ret : 0; } @@ -374,7 +374,7 @@ static int __maybe_unused mtk_smi_larb_resume(struct device *dev) int ret; /* Power on smi-common. */ - ret = pm_runtime_get_sync(larb->smi_common_dev); + ret = pm_runtime_resume_and_get(larb->smi_common_dev); if (ret < 0) { dev_err(dev, "Failed to pm get for smi-common(%d).\n", ret); return ret; diff --git a/drivers/memory/ti-aemif.c b/drivers/memory/ti-aemif.c index 159a16f5e7d6..51d20c2ccb75 100644 --- a/drivers/memory/ti-aemif.c +++ b/drivers/memory/ti-aemif.c @@ -378,8 +378,10 @@ static int aemif_probe(struct platform_device *pdev) */ for_each_available_child_of_node(np, child_np) { ret = of_aemif_parse_abus_config(pdev, child_np); - if (ret < 0) + if (ret < 0) { + of_node_put(child_np); goto error; + } } } else if (pdata && pdata->num_abus_data > 0) { for (i = 0; i < pdata->num_abus_data; i++, aemif->num_cs++) { @@ -405,8 +407,10 @@ static int aemif_probe(struct platform_device *pdev) for_each_available_child_of_node(np, child_np) { ret = of_platform_populate(child_np, NULL, dev_lookup, dev); - if (ret < 0) + if (ret < 0) { + of_node_put(child_np); goto error; + } } } else if (pdata) { for (i = 0; i < pdata->num_sub_devices; i++) { diff --git a/drivers/mfd/altera-sysmgr.c b/drivers/mfd/altera-sysmgr.c index 193a96c8b1ea..20cb294c7512 100644 --- a/drivers/mfd/altera-sysmgr.c +++ b/drivers/mfd/altera-sysmgr.c @@ -145,7 +145,8 @@ static int sysmgr_probe(struct platform_device *pdev) sysmgr_config.reg_write = s10_protected_reg_write; /* Need physical address for SMCC call */ - regmap = devm_regmap_init(dev, NULL, (void *)res->start, + regmap = devm_regmap_init(dev, NULL, + (void *)(uintptr_t)res->start, &sysmgr_config); } else { base = devm_ioremap(dev, res->start, resource_size(res)); diff --git a/drivers/mfd/bd9571mwv.c b/drivers/mfd/bd9571mwv.c index fab3cdc27ed6..19d57a45134c 100644 --- a/drivers/mfd/bd9571mwv.c +++ b/drivers/mfd/bd9571mwv.c @@ -185,9 +185,9 @@ static int bd9571mwv_probe(struct i2c_client *client, return ret; } - ret = mfd_add_devices(bd->dev, PLATFORM_DEVID_AUTO, bd9571mwv_cells, - ARRAY_SIZE(bd9571mwv_cells), NULL, 0, - regmap_irq_get_domain(bd->irq_data)); + ret = devm_mfd_add_devices(bd->dev, PLATFORM_DEVID_AUTO, + bd9571mwv_cells, ARRAY_SIZE(bd9571mwv_cells), + NULL, 0, regmap_irq_get_domain(bd->irq_data)); if (ret) { regmap_del_irq_chip(bd->irq, bd->irq_data); return ret; diff --git a/drivers/mfd/gateworks-gsc.c b/drivers/mfd/gateworks-gsc.c index 576da62fbb0c..d87876747b91 100644 --- a/drivers/mfd/gateworks-gsc.c +++ b/drivers/mfd/gateworks-gsc.c @@ -234,7 +234,7 @@ static int gsc_probe(struct i2c_client *client) ret = devm_regmap_add_irq_chip(dev, gsc->regmap, client->irq, IRQF_ONESHOT | IRQF_SHARED | - IRQF_TRIGGER_FALLING, 0, + IRQF_TRIGGER_LOW, 0, &gsc_irq_chip, &irq_data); if (ret) return ret; diff --git a/drivers/mfd/intel_quark_i2c_gpio.c b/drivers/mfd/intel_quark_i2c_gpio.c index fe8ca945f367..b67cb0a3ab05 100644 --- a/drivers/mfd/intel_quark_i2c_gpio.c +++ b/drivers/mfd/intel_quark_i2c_gpio.c @@ -72,7 +72,8 @@ static const struct dmi_system_id dmi_platform_info[] = { {} }; -static const struct resource intel_quark_i2c_res[] = { +/* This is used as a place holder and will be modified at run-time */ +static struct resource intel_quark_i2c_res[] = { [INTEL_QUARK_IORES_MEM] = { .flags = IORESOURCE_MEM, }, @@ -85,7 +86,8 @@ static struct mfd_cell_acpi_match intel_quark_acpi_match_i2c = { .adr = MFD_ACPI_MATCH_I2C, }; -static const struct resource intel_quark_gpio_res[] = { +/* This is used as a place holder and will be modified at run-time */ +static struct resource intel_quark_gpio_res[] = { [INTEL_QUARK_IORES_MEM] = { .flags = IORESOURCE_MEM, }, diff --git a/drivers/mfd/wm831x-auxadc.c b/drivers/mfd/wm831x-auxadc.c index 8a7cc0f86958..65b98f3fbd92 100644 --- a/drivers/mfd/wm831x-auxadc.c +++ b/drivers/mfd/wm831x-auxadc.c @@ -93,11 +93,10 @@ static int wm831x_auxadc_read_irq(struct wm831x *wm831x, wait_for_completion_timeout(&req->done, msecs_to_jiffies(500)); mutex_lock(&wm831x->auxadc_lock); - - list_del(&req->list); ret = req->val; out: + list_del(&req->list); mutex_unlock(&wm831x->auxadc_lock); kfree(req); diff --git a/drivers/misc/cardreader/rts5227.c b/drivers/misc/cardreader/rts5227.c index 8859011672cb..8200af22b529 100644 --- a/drivers/misc/cardreader/rts5227.c +++ b/drivers/misc/cardreader/rts5227.c @@ -398,6 +398,11 @@ static int rts522a_extra_init_hw(struct rtsx_pcr *pcr) { rts5227_extra_init_hw(pcr); + /* Power down OCP for power consumption */ + if (!pcr->card_exist) + rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN, + OC_POWER_DOWN); + rtsx_pci_write_register(pcr, FUNC_FORCE_CTL, FUNC_FORCE_UPME_XMT_DBG, FUNC_FORCE_UPME_XMT_DBG); rtsx_pci_write_register(pcr, PCLK_CTL, 0x04, 0x04); diff --git a/drivers/misc/eeprom/eeprom_93xx46.c b/drivers/misc/eeprom/eeprom_93xx46.c index 7c45f82b4302..6e5f544c9c73 100644 --- a/drivers/misc/eeprom/eeprom_93xx46.c +++ b/drivers/misc/eeprom/eeprom_93xx46.c @@ -35,6 +35,10 @@ static const struct eeprom_93xx46_devtype_data atmel_at93c46d_data = { EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH, }; +static const struct eeprom_93xx46_devtype_data microchip_93lc46b_data = { + .quirks = EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE, +}; + struct eeprom_93xx46_dev { struct spi_device *spi; struct eeprom_93xx46_platform_data *pdata; @@ -55,6 +59,11 @@ static inline bool has_quirk_instruction_length(struct eeprom_93xx46_dev *edev) return edev->pdata->quirks & EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH; } +static inline bool has_quirk_extra_read_cycle(struct eeprom_93xx46_dev *edev) +{ + return edev->pdata->quirks & EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE; +} + static int eeprom_93xx46_read(void *priv, unsigned int off, void *val, size_t count) { @@ -96,6 +105,11 @@ static int eeprom_93xx46_read(void *priv, unsigned int off, dev_dbg(&edev->spi->dev, "read cmd 0x%x, %d Hz\n", cmd_addr, edev->spi->max_speed_hz); + if (has_quirk_extra_read_cycle(edev)) { + cmd_addr <<= 1; + bits += 1; + } + spi_message_init(&m); t[0].tx_buf = (char *)&cmd_addr; @@ -363,6 +377,7 @@ static void select_deassert(void *context) static const struct of_device_id eeprom_93xx46_of_table[] = { { .compatible = "eeprom-93xx46", }, { .compatible = "atmel,at93c46d", .data = &atmel_at93c46d_data, }, + { .compatible = "microchip,93lc46b", .data = µchip_93lc46b_data, }, {} }; MODULE_DEVICE_TABLE(of, eeprom_93xx46_of_table); @@ -512,3 +527,4 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Driver for 93xx46 EEPROMs"); MODULE_AUTHOR("Anatolij Gustschin "); MODULE_ALIAS("spi:93xx46"); +MODULE_ALIAS("spi:eeprom-93xx46"); diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 70eb5ed942d0..beda610e6b30 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -520,12 +520,13 @@ fastrpc_map_dma_buf(struct dma_buf_attachment *attachment, { struct fastrpc_dma_buf_attachment *a = attachment->priv; struct sg_table *table; + int ret; table = &a->sgt; - if (!dma_map_sgtable(attachment->dev, table, dir, 0)) - return ERR_PTR(-ENOMEM); - + ret = dma_map_sgtable(attachment->dev, table, dir, 0); + if (ret) + table = ERR_PTR(ret); return table; } @@ -949,6 +950,11 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl, u32 kernel, if (!fl->cctx->rpdev) return -EPIPE; + if (handle == FASTRPC_INIT_HANDLE && !kernel) { + dev_warn_ratelimited(fl->sctx->dev, "user app trying to send a kernel RPC message (%d)\n", handle); + return -EPERM; + } + ctx = fastrpc_context_alloc(fl, kernel, sc, args); if (IS_ERR(ctx)) return PTR_ERR(ctx); diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 69d04eca767f..82c0306a9210 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -93,12 +93,19 @@ void hl_hpriv_put(struct hl_fpriv *hpriv) static int hl_device_release(struct inode *inode, struct file *filp) { struct hl_fpriv *hpriv = filp->private_data; + struct hl_device *hdev = hpriv->hdev; + + filp->private_data = NULL; + + if (!hdev) { + pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n"); + put_pid(hpriv->taskpid); + return 0; + } hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr); hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr); - filp->private_data = NULL; - hl_hpriv_put(hpriv); return 0; @@ -107,15 +114,20 @@ static int hl_device_release(struct inode *inode, struct file *filp) static int hl_device_release_ctrl(struct inode *inode, struct file *filp) { struct hl_fpriv *hpriv = filp->private_data; - struct hl_device *hdev; + struct hl_device *hdev = hpriv->hdev; filp->private_data = NULL; - hdev = hpriv->hdev; + if (!hdev) { + pr_err("Closing FD after device was removed\n"); + goto out; + } mutex_lock(&hdev->fpriv_list_lock); list_del(&hpriv->dev_node); mutex_unlock(&hdev->fpriv_list_lock); +out: + put_pid(hpriv->taskpid); kfree(hpriv); @@ -134,8 +146,14 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp) static int hl_mmap(struct file *filp, struct vm_area_struct *vma) { struct hl_fpriv *hpriv = filp->private_data; + struct hl_device *hdev = hpriv->hdev; unsigned long vm_pgoff; + if (!hdev) { + pr_err_ratelimited("Trying to mmap after device was removed! Please close FD\n"); + return -ENODEV; + } + vm_pgoff = vma->vm_pgoff; vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff); @@ -882,6 +900,16 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout) return -EBUSY; } +static void device_disable_open_processes(struct hl_device *hdev) +{ + struct hl_fpriv *hpriv; + + mutex_lock(&hdev->fpriv_list_lock); + list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node) + hpriv->hdev = NULL; + mutex_unlock(&hdev->fpriv_list_lock); +} + /* * hl_device_reset - reset the device * @@ -1536,8 +1564,10 @@ void hl_device_fini(struct hl_device *hdev) HL_PENDING_RESET_LONG_SEC); rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC); - if (rc) + if (rc) { dev_crit(hdev->dev, "Failed to kill all open processes\n"); + device_disable_open_processes(hdev); + } hl_cb_pool_fini(hdev); diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index d25892d61ec9..0805e1173d54 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -5,6 +5,8 @@ * All Rights Reserved. */ +#define pr_fmt(fmt) "habanalabs: " fmt + #include #include "habanalabs.h" @@ -667,6 +669,11 @@ long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) const struct hl_ioctl_desc *ioctl = NULL; unsigned int nr = _IOC_NR(cmd); + if (!hdev) { + pr_err_ratelimited("Sending ioctl after device was removed! Please close FD\n"); + return -ENODEV; + } + if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) { ioctl = &hl_ioctls[nr]; } else { @@ -685,6 +692,11 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg) const struct hl_ioctl_desc *ioctl = NULL; unsigned int nr = _IOC_NR(cmd); + if (!hdev) { + pr_err_ratelimited("Sending ioctl after device was removed! Please close FD\n"); + return -ENODEV; + } + if (nr == _IOC_NR(HL_IOCTL_INFO)) { ioctl = &hl_ioctls_control[nr]; } else { diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 2907db260fba..bf0407e8905c 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -60,6 +60,13 @@ ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, u8 vtag, goto out; } + if (vtag) { + /* Check if vtag is supported by client */ + rets = mei_cl_vt_support_check(cl); + if (rets) + goto out; + } + if (length > mei_cl_mtu(cl)) { rets = -EFBIG; goto out; diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c index 686e8b6a4c55..0cba3c6dfb14 100644 --- a/drivers/misc/mei/hbm.c +++ b/drivers/misc/mei/hbm.c @@ -1373,7 +1373,7 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr) return -EPROTO; } - dev->dev_state = MEI_DEV_POWER_DOWN; + mei_set_devstate(dev, MEI_DEV_POWER_DOWN); dev_info(dev->dev, "hbm: stop response: resetting.\n"); /* force the reset */ return -EPROTO; diff --git a/drivers/misc/mei/hw-me-regs.h b/drivers/misc/mei/hw-me-regs.h index 9cf8d8f60cfe..14be76d4c2e6 100644 --- a/drivers/misc/mei/hw-me-regs.h +++ b/drivers/misc/mei/hw-me-regs.h @@ -101,6 +101,11 @@ #define MEI_DEV_ID_MCC 0x4B70 /* Mule Creek Canyon (EHL) */ #define MEI_DEV_ID_MCC_4 0x4B75 /* Mule Creek Canyon 4 (EHL) */ +#define MEI_DEV_ID_EBG 0x1BE0 /* Emmitsburg WS */ + +#define MEI_DEV_ID_ADP_S 0x7AE8 /* Alder Lake Point S */ +#define MEI_DEV_ID_ADP_LP 0x7A60 /* Alder Lake Point LP */ + /* * MEI HW Section */ diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c index 326955b04fda..2161c1234ad7 100644 --- a/drivers/misc/mei/interrupt.c +++ b/drivers/misc/mei/interrupt.c @@ -295,12 +295,17 @@ static inline bool hdr_is_fixed(struct mei_msg_hdr *mei_hdr) static inline int hdr_is_valid(u32 msg_hdr) { struct mei_msg_hdr *mei_hdr; + u32 expected_len = 0; mei_hdr = (struct mei_msg_hdr *)&msg_hdr; if (!msg_hdr || mei_hdr->reserved) return -EBADMSG; - if (mei_hdr->dma_ring && mei_hdr->length != MEI_SLOT_SIZE) + if (mei_hdr->dma_ring) + expected_len += MEI_SLOT_SIZE; + if (mei_hdr->extended) + expected_len += MEI_SLOT_SIZE; + if (mei_hdr->length < expected_len) return -EBADMSG; return 0; @@ -324,6 +329,8 @@ int mei_irq_read_handler(struct mei_device *dev, struct mei_cl *cl; int ret; u32 ext_meta_hdr_u32; + u32 hdr_size_left; + u32 hdr_size_ext; int i; int ext_hdr_end; @@ -353,6 +360,7 @@ int mei_irq_read_handler(struct mei_device *dev, } ext_hdr_end = 1; + hdr_size_left = mei_hdr->length; if (mei_hdr->extended) { if (!dev->rd_msg_hdr[1]) { @@ -363,8 +371,21 @@ int mei_irq_read_handler(struct mei_device *dev, dev_dbg(dev->dev, "extended header is %08x\n", ext_meta_hdr_u32); } - meta_hdr = ((struct mei_ext_meta_hdr *) - dev->rd_msg_hdr + 1); + meta_hdr = ((struct mei_ext_meta_hdr *)dev->rd_msg_hdr + 1); + if (check_add_overflow((u32)sizeof(*meta_hdr), + mei_slots2data(meta_hdr->size), + &hdr_size_ext)) { + dev_err(dev->dev, "extended message size too big %d\n", + meta_hdr->size); + return -EBADMSG; + } + if (hdr_size_left < hdr_size_ext) { + dev_err(dev->dev, "corrupted message header len %d\n", + mei_hdr->length); + return -EBADMSG; + } + hdr_size_left -= hdr_size_ext; + ext_hdr_end = meta_hdr->size + 2; for (i = dev->rd_msg_hdr_count; i < ext_hdr_end; i++) { dev->rd_msg_hdr[i] = mei_read_hdr(dev); @@ -376,6 +397,12 @@ int mei_irq_read_handler(struct mei_device *dev, } if (mei_hdr->dma_ring) { + if (hdr_size_left != sizeof(dev->rd_msg_hdr[ext_hdr_end])) { + dev_err(dev->dev, "corrupted message header len %d\n", + mei_hdr->length); + return -EBADMSG; + } + dev->rd_msg_hdr[ext_hdr_end] = mei_read_hdr(dev); dev->rd_msg_hdr_count++; (*slots)--; diff --git a/drivers/misc/mei/pci-me.c b/drivers/misc/mei/pci-me.c index 1de9ef7a272b..a7e179626b63 100644 --- a/drivers/misc/mei/pci-me.c +++ b/drivers/misc/mei/pci-me.c @@ -107,6 +107,11 @@ static const struct pci_device_id mei_me_pci_tbl[] = { {MEI_PCI_DEVICE(MEI_DEV_ID_CDF, MEI_ME_PCH8_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_EBG, MEI_ME_PCH15_SPS_CFG)}, + + {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_S, MEI_ME_PCH15_CFG)}, + {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_LP, MEI_ME_PCH15_CFG)}, + /* required last entry */ {0, } }; diff --git a/drivers/misc/pvpanic.c b/drivers/misc/pvpanic.c index 41cab297d66e..2356d621967e 100644 --- a/drivers/misc/pvpanic.c +++ b/drivers/misc/pvpanic.c @@ -92,6 +92,7 @@ static const struct of_device_id pvpanic_mmio_match[] = { { .compatible = "qemu,pvpanic-mmio", }, {} }; +MODULE_DEVICE_TABLE(of, pvpanic_mmio_match); static const struct acpi_device_id pvpanic_device_ids[] = { { "QEMU0001", 0 }, diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c index c49065887e8f..c2338750313c 100644 --- a/drivers/misc/vmw_vmci/vmci_queue_pair.c +++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c @@ -537,6 +537,9 @@ static struct vmci_queue *qp_host_alloc_queue(u64 size) queue_page_size = num_pages * sizeof(*queue->kernel_if->u.h.page); + if (queue_size + queue_page_size > KMALLOC_MAX_SIZE) + return NULL; + queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL); if (queue) { queue->q_header = NULL; @@ -630,7 +633,7 @@ static void qp_release_pages(struct page **pages, for (i = 0; i < num_pages; i++) { if (dirty) - set_page_dirty(pages[i]); + set_page_dirty_lock(pages[i]); put_page(pages[i]); pages[i] = NULL; diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c index c2e70b757dd1..4383c262b3f5 100644 --- a/drivers/mmc/core/bus.c +++ b/drivers/mmc/core/bus.c @@ -399,11 +399,6 @@ void mmc_remove_card(struct mmc_card *card) mmc_remove_card_debugfs(card); #endif - if (host->cqe_enabled) { - host->cqe_ops->cqe_disable(host); - host->cqe_enabled = false; - } - if (mmc_card_present(card)) { if (mmc_host_is_spi(card->host)) { pr_info("%s: SPI card removed\n", @@ -416,6 +411,10 @@ void mmc_remove_card(struct mmc_card *card) of_node_put(card->dev.of_node); } + if (host->cqe_enabled) { + host->cqe_ops->cqe_disable(host); + host->cqe_enabled = false; + } + put_device(&card->dev); } - diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c index ff3063ce2acd..9ce34e880033 100644 --- a/drivers/mmc/core/mmc.c +++ b/drivers/mmc/core/mmc.c @@ -423,10 +423,6 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd) /* EXT_CSD value is in units of 10ms, but we store in ms */ card->ext_csd.part_time = 10 * ext_csd[EXT_CSD_PART_SWITCH_TIME]; - /* Some eMMC set the value too low so set a minimum */ - if (card->ext_csd.part_time && - card->ext_csd.part_time < MMC_MIN_PART_SWITCH_TIME) - card->ext_csd.part_time = MMC_MIN_PART_SWITCH_TIME; /* Sleep / awake timeout in 100ns units */ if (sa_shift > 0 && sa_shift <= 0x17) @@ -616,6 +612,17 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd) card->ext_csd.data_sector_size = 512; } + /* + * GENERIC_CMD6_TIME is to be used "unless a specific timeout is defined + * when accessing a specific field", so use it here if there is no + * PARTITION_SWITCH_TIME. + */ + if (!card->ext_csd.part_time) + card->ext_csd.part_time = card->ext_csd.generic_cmd6_time; + /* Some eMMC set the value too low so set a minimum */ + if (card->ext_csd.part_time < MMC_MIN_PART_SWITCH_TIME) + card->ext_csd.part_time = MMC_MIN_PART_SWITCH_TIME; + /* eMMC v5 or later */ if (card->ext_csd.rev >= 7) { memcpy(card->ext_csd.fwrev, &ext_csd[EXT_CSD_FIRMWARE_VERSION], diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c index b5a41a7ce165..9bde0def114b 100644 --- a/drivers/mmc/host/mmci.c +++ b/drivers/mmc/host/mmci.c @@ -1241,7 +1241,11 @@ mmci_start_command(struct mmci_host *host, struct mmc_command *cmd, u32 c) if (!cmd->busy_timeout) cmd->busy_timeout = 10 * MSEC_PER_SEC; - clks = (unsigned long long)cmd->busy_timeout * host->cclk; + if (cmd->busy_timeout > host->mmc->max_busy_timeout) + clks = (unsigned long long)host->mmc->max_busy_timeout * host->cclk; + else + clks = (unsigned long long)cmd->busy_timeout * host->cclk; + do_div(clks, MSEC_PER_SEC); writel_relaxed(clks, host->base + MMCIDATATIMER); } @@ -2091,6 +2095,10 @@ static int mmci_probe(struct amba_device *dev, mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY; } + /* Variants with mandatory busy timeout in HW needs R1B responses. */ + if (variant->busy_timeout) + mmc->caps |= MMC_CAP_NEED_RSP_BUSY; + /* Prepare a CMD12 - needed to clear the DPSM on some variants. */ host->stop_abort.opcode = MMC_STOP_TRANSMISSION; host->stop_abort.arg = 0; diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index de09c6347524..898ed1b023df 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -1127,13 +1127,13 @@ static void msdc_track_cmd_data(struct msdc_host *host, static void msdc_request_done(struct msdc_host *host, struct mmc_request *mrq) { unsigned long flags; - bool ret; - ret = cancel_delayed_work(&host->req_timeout); - if (!ret) { - /* delay work already running */ - return; - } + /* + * No need check the return value of cancel_delayed_work, as only ONE + * path will go here! + */ + cancel_delayed_work(&host->req_timeout); + spin_lock_irqsave(&host->lock, flags); host->mrq = NULL; spin_unlock_irqrestore(&host->lock, flags); @@ -1155,7 +1155,7 @@ static bool msdc_cmd_done(struct msdc_host *host, int events, bool done = false; bool sbc_error; unsigned long flags; - u32 *rsp = cmd->resp; + u32 *rsp; if (mrq->sbc && cmd == mrq->cmd && (events & (MSDC_INT_ACMDRDY | MSDC_INT_ACMDCRCERR @@ -1176,6 +1176,7 @@ static bool msdc_cmd_done(struct msdc_host *host, int events, if (done) return true; + rsp = cmd->resp; sdr_clr_bits(host->base + MSDC_INTEN, cmd_ints_mask); @@ -1363,7 +1364,7 @@ static void msdc_data_xfer_next(struct msdc_host *host, static bool msdc_data_xfer_done(struct msdc_host *host, u32 events, struct mmc_request *mrq, struct mmc_data *data) { - struct mmc_command *stop = data->stop; + struct mmc_command *stop; unsigned long flags; bool done; unsigned int check_data = events & @@ -1379,6 +1380,7 @@ static bool msdc_data_xfer_done(struct msdc_host *host, u32 events, if (done) return true; + stop = data->stop; if (check_data || (stop && stop->error)) { dev_dbg(host->dev, "DMA status: 0x%8X\n", diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c index 56bbc6cd9c84..947581de7860 100644 --- a/drivers/mmc/host/mxs-mmc.c +++ b/drivers/mmc/host/mxs-mmc.c @@ -628,7 +628,7 @@ static int mxs_mmc_probe(struct platform_device *pdev) ret = mmc_of_parse(mmc); if (ret) - goto out_clk_disable; + goto out_free_dma; mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34; diff --git a/drivers/mmc/host/owl-mmc.c b/drivers/mmc/host/owl-mmc.c index 53b81582f1af..5490962dc8e5 100644 --- a/drivers/mmc/host/owl-mmc.c +++ b/drivers/mmc/host/owl-mmc.c @@ -640,7 +640,7 @@ static int owl_mmc_probe(struct platform_device *pdev) owl_host->irq = platform_get_irq(pdev, 0); if (owl_host->irq < 0) { ret = -EINVAL; - goto err_free_host; + goto err_release_channel; } ret = devm_request_irq(&pdev->dev, owl_host->irq, owl_irq_handler, @@ -648,19 +648,21 @@ static int owl_mmc_probe(struct platform_device *pdev) if (ret) { dev_err(&pdev->dev, "Failed to request irq %d\n", owl_host->irq); - goto err_free_host; + goto err_release_channel; } ret = mmc_add_host(mmc); if (ret) { dev_err(&pdev->dev, "Failed to add host\n"); - goto err_free_host; + goto err_release_channel; } dev_dbg(&pdev->dev, "Owl MMC Controller Initialized\n"); return 0; +err_release_channel: + dma_release_channel(owl_host->dma); err_free_host: mmc_free_host(mmc); @@ -674,6 +676,7 @@ static int owl_mmc_remove(struct platform_device *pdev) mmc_remove_host(mmc); disable_irq(owl_host->irq); + dma_release_channel(owl_host->dma); mmc_free_host(mmc); return 0; diff --git a/drivers/mmc/host/renesas_sdhi_internal_dmac.c b/drivers/mmc/host/renesas_sdhi_internal_dmac.c index fe13e1ea22dc..f3e76d6b3e3f 100644 --- a/drivers/mmc/host/renesas_sdhi_internal_dmac.c +++ b/drivers/mmc/host/renesas_sdhi_internal_dmac.c @@ -186,8 +186,8 @@ renesas_sdhi_internal_dmac_start_dma(struct tmio_mmc_host *host, mmc_get_dma_dir(data))) goto force_pio; - /* This DMAC cannot handle if buffer is not 8-bytes alignment */ - if (!IS_ALIGNED(sg_dma_address(sg), 8)) + /* This DMAC cannot handle if buffer is not 128-bytes alignment */ + if (!IS_ALIGNED(sg_dma_address(sg), 128)) goto force_pio_with_unmap; if (data->flags & MMC_DATA_READ) { diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c index 16ed19f47939..a20459744d21 100644 --- a/drivers/mmc/host/sdhci-esdhc-imx.c +++ b/drivers/mmc/host/sdhci-esdhc-imx.c @@ -1666,9 +1666,10 @@ static int sdhci_esdhc_imx_remove(struct platform_device *pdev) struct sdhci_host *host = platform_get_drvdata(pdev); struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host); struct pltfm_imx_data *imx_data = sdhci_pltfm_priv(pltfm_host); - int dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff); + int dead; pm_runtime_get_sync(&pdev->dev); + dead = (readl(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff); pm_runtime_disable(&pdev->dev); pm_runtime_put_noidle(&pdev->dev); diff --git a/drivers/mmc/host/sdhci-iproc.c b/drivers/mmc/host/sdhci-iproc.c index c9434b461aab..ddeaf8e1f72f 100644 --- a/drivers/mmc/host/sdhci-iproc.c +++ b/drivers/mmc/host/sdhci-iproc.c @@ -296,9 +296,27 @@ static const struct of_device_id sdhci_iproc_of_match[] = { MODULE_DEVICE_TABLE(of, sdhci_iproc_of_match); #ifdef CONFIG_ACPI +/* + * This is a duplicate of bcm2835_(pltfrm_)data without caps quirks + * which are provided by the ACPI table. + */ +static const struct sdhci_pltfm_data sdhci_bcm_arasan_data = { + .quirks = SDHCI_QUIRK_BROKEN_CARD_DETECTION | + SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | + SDHCI_QUIRK_NO_HISPD_BIT, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, + .ops = &sdhci_iproc_32only_ops, +}; + +static const struct sdhci_iproc_data bcm_arasan_data = { + .pdata = &sdhci_bcm_arasan_data, +}; + static const struct acpi_device_id sdhci_iproc_acpi_ids[] = { { .id = "BRCM5871", .driver_data = (kernel_ulong_t)&iproc_cygnus_data }, { .id = "BRCM5872", .driver_data = (kernel_ulong_t)&iproc_data }, + { .id = "BCM2847", .driver_data = (kernel_ulong_t)&bcm_arasan_data }, + { .id = "BRCME88C", .driver_data = (kernel_ulong_t)&bcm2711_data }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(acpi, sdhci_iproc_acpi_ids); diff --git a/drivers/mmc/host/sdhci-of-dwcmshc.c b/drivers/mmc/host/sdhci-of-dwcmshc.c index d90020ed3622..59d8d96ce206 100644 --- a/drivers/mmc/host/sdhci-of-dwcmshc.c +++ b/drivers/mmc/host/sdhci-of-dwcmshc.c @@ -112,6 +112,7 @@ static const struct sdhci_ops sdhci_dwcmshc_ops = { static const struct sdhci_pltfm_data sdhci_dwcmshc_pdata = { .ops = &sdhci_dwcmshc_ops, .quirks = SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN, + .quirks2 = SDHCI_QUIRK2_PRESET_VALUE_BROKEN, }; static int dwcmshc_probe(struct platform_device *pdev) diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c index fa76748d8929..94e3f72f6405 100644 --- a/drivers/mmc/host/sdhci-pci-o2micro.c +++ b/drivers/mmc/host/sdhci-pci-o2micro.c @@ -33,6 +33,8 @@ #define O2_SD_ADMA2 0xE7 #define O2_SD_INF_MOD 0xF1 #define O2_SD_MISC_CTRL4 0xFC +#define O2_SD_MISC_CTRL 0x1C0 +#define O2_SD_PWR_FORCE_L0 0x0002 #define O2_SD_TUNING_CTRL 0x300 #define O2_SD_PLL_SETTING 0x304 #define O2_SD_MISC_SETTING 0x308 @@ -300,6 +302,8 @@ static int sdhci_o2_execute_tuning(struct mmc_host *mmc, u32 opcode) { struct sdhci_host *host = mmc_priv(mmc); int current_bus_width = 0; + u32 scratch32 = 0; + u16 scratch = 0; /* * This handler only implements the eMMC tuning that is specific to @@ -312,6 +316,17 @@ static int sdhci_o2_execute_tuning(struct mmc_host *mmc, u32 opcode) if (WARN_ON((opcode != MMC_SEND_TUNING_BLOCK_HS200) && (opcode != MMC_SEND_TUNING_BLOCK))) return -EINVAL; + + /* Force power mode enter L0 */ + scratch = sdhci_readw(host, O2_SD_MISC_CTRL); + scratch |= O2_SD_PWR_FORCE_L0; + sdhci_writew(host, scratch, O2_SD_MISC_CTRL); + + /* wait DLL lock, timeout value 5ms */ + if (readx_poll_timeout(sdhci_o2_pll_dll_wdt_control, host, + scratch32, (scratch32 & O2_DLL_LOCK_STATUS), 1, 5000)) + pr_warn("%s: DLL can't lock in 5ms after force L0 during tuning.\n", + mmc_hostname(host->mmc)); /* * Judge the tuning reason, whether caused by dll shift * If cause by dll shift, should call sdhci_o2_dll_recovery @@ -344,6 +359,11 @@ static int sdhci_o2_execute_tuning(struct mmc_host *mmc, u32 opcode) sdhci_set_bus_width(host, current_bus_width); } + /* Cancel force power mode enter L0 */ + scratch = sdhci_readw(host, O2_SD_MISC_CTRL); + scratch &= ~(O2_SD_PWR_FORCE_L0); + sdhci_writew(host, scratch, O2_SD_MISC_CTRL); + sdhci_reset(host, SDHCI_RESET_CMD); sdhci_reset(host, SDHCI_RESET_DATA); diff --git a/drivers/mmc/host/sdhci-sprd.c b/drivers/mmc/host/sdhci-sprd.c index f85171edabeb..5dc36efff47f 100644 --- a/drivers/mmc/host/sdhci-sprd.c +++ b/drivers/mmc/host/sdhci-sprd.c @@ -708,14 +708,14 @@ static int sdhci_sprd_remove(struct platform_device *pdev) { struct sdhci_host *host = platform_get_drvdata(pdev); struct sdhci_sprd_host *sprd_host = TO_SPRD_HOST(host); - struct mmc_host *mmc = host->mmc; - mmc_remove_host(mmc); + sdhci_remove_host(host, 0); + clk_disable_unprepare(sprd_host->clk_sdio); clk_disable_unprepare(sprd_host->clk_enable); clk_disable_unprepare(sprd_host->clk_2x_enable); - mmc_free_host(mmc); + sdhci_pltfm_free(pdev); return 0; } diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c index e2d5112d809d..615f3d008af1 100644 --- a/drivers/mmc/host/usdhi6rol0.c +++ b/drivers/mmc/host/usdhi6rol0.c @@ -1858,10 +1858,12 @@ static int usdhi6_probe(struct platform_device *pdev) ret = mmc_add_host(mmc); if (ret < 0) - goto e_clk_off; + goto e_release_dma; return 0; +e_release_dma: + usdhi6_dma_release(host); e_clk_off: clk_disable_unprepare(host->clk); e_free_mmc: diff --git a/drivers/mtd/devices/phram.c b/drivers/mtd/devices/phram.c index cfd170946ba4..5b04ae6c3057 100644 --- a/drivers/mtd/devices/phram.c +++ b/drivers/mtd/devices/phram.c @@ -222,6 +222,7 @@ static int phram_setup(const char *val) uint64_t start; uint64_t len; uint64_t erasesize = PAGE_SIZE; + uint32_t rem; int i, ret; if (strnlen(val, sizeof(buf)) >= sizeof(buf)) @@ -263,8 +264,11 @@ static int phram_setup(const char *val) } } + if (erasesize) + div_u64_rem(len, (uint32_t)erasesize, &rem); + if (len == 0 || erasesize == 0 || erasesize > len - || erasesize > UINT_MAX || do_div(len, (uint32_t)erasesize) != 0) { + || erasesize > UINT_MAX || rem) { parse_err("illegal erasesize or len\n"); goto error; } diff --git a/drivers/mtd/nand/raw/intel-nand-controller.c b/drivers/mtd/nand/raw/intel-nand-controller.c index a304fda5d1fa..8b49fd56cf96 100644 --- a/drivers/mtd/nand/raw/intel-nand-controller.c +++ b/drivers/mtd/nand/raw/intel-nand-controller.c @@ -318,8 +318,10 @@ static int ebu_dma_start(struct ebu_nand_controller *ebu_host, u32 dir, } tx = dmaengine_prep_slave_single(chan, buf_dma, len, dir, flags); - if (!tx) - return -ENXIO; + if (!tx) { + ret = -ENXIO; + goto err_unmap; + } tx->callback = callback; tx->callback_param = ebu_host; diff --git a/drivers/mtd/parsers/afs.c b/drivers/mtd/parsers/afs.c index 980e332bdac4..26116694c821 100644 --- a/drivers/mtd/parsers/afs.c +++ b/drivers/mtd/parsers/afs.c @@ -370,10 +370,8 @@ static int parse_afs_partitions(struct mtd_info *mtd, return i; out_free_parts: - while (i >= 0) { + while (--i >= 0) kfree(parts[i].name); - i--; - } kfree(parts); *pparts = NULL; return ret; diff --git a/drivers/mtd/parsers/parser_imagetag.c b/drivers/mtd/parsers/parser_imagetag.c index d69607b48227..fab0949aabba 100644 --- a/drivers/mtd/parsers/parser_imagetag.c +++ b/drivers/mtd/parsers/parser_imagetag.c @@ -83,6 +83,7 @@ static int bcm963xx_parse_imagetag_partitions(struct mtd_info *master, pr_err("invalid rootfs address: %*ph\n", (int)sizeof(buf->flash_image_start), buf->flash_image_start); + ret = -EINVAL; goto out; } @@ -92,6 +93,7 @@ static int bcm963xx_parse_imagetag_partitions(struct mtd_info *master, pr_err("invalid kernel address: %*ph\n", (int)sizeof(buf->kernel_address), buf->kernel_address); + ret = -EINVAL; goto out; } @@ -100,6 +102,7 @@ static int bcm963xx_parse_imagetag_partitions(struct mtd_info *master, pr_err("invalid kernel length: %*ph\n", (int)sizeof(buf->kernel_length), buf->kernel_length); + ret = -EINVAL; goto out; } @@ -108,6 +111,7 @@ static int bcm963xx_parse_imagetag_partitions(struct mtd_info *master, pr_err("invalid total length: %*ph\n", (int)sizeof(buf->total_length), buf->total_length); + ret = -EINVAL; goto out; } diff --git a/drivers/mtd/spi-nor/controllers/hisi-sfc.c b/drivers/mtd/spi-nor/controllers/hisi-sfc.c index 7c26f8f565cb..47fbf1d1e557 100644 --- a/drivers/mtd/spi-nor/controllers/hisi-sfc.c +++ b/drivers/mtd/spi-nor/controllers/hisi-sfc.c @@ -399,8 +399,10 @@ static int hisi_spi_nor_register_all(struct hifmc_host *host) for_each_available_child_of_node(dev->of_node, np) { ret = hisi_spi_nor_register(np, host); - if (ret) + if (ret) { + of_node_put(np); goto fail; + } if (host->num_chip == HIFMC_MAX_CHIP_NUM) { dev_warn(dev, "Flash device number exceeds the maximum chipselect number\n"); diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 20df44b753da..b17faccc95c4 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -1364,14 +1364,15 @@ spi_nor_find_best_erase_type(const struct spi_nor_erase_map *map, erase = &map->erase_type[i]; + /* Alignment is not mandatory for overlaid regions */ + if (region->offset & SNOR_OVERLAID_REGION && + region->size <= len) + return erase; + /* Don't erase more than what the user has asked for. */ if (erase->size > len) continue; - /* Alignment is not mandatory for overlaid regions */ - if (region->offset & SNOR_OVERLAID_REGION) - return erase; - spi_nor_div_by_erase_size(erase, addr, &rem); if (rem) continue; @@ -1515,6 +1516,7 @@ static int spi_nor_init_erase_cmd_list(struct spi_nor *nor, goto destroy_erase_cmd_list; if (prev_erase != erase || + erase->size != cmd->size || region->offset & SNOR_OVERLAID_REGION) { cmd = spi_nor_init_erase_cmd(region, erase); if (IS_ERR(cmd)) { diff --git a/drivers/mtd/spi-nor/sfdp.c b/drivers/mtd/spi-nor/sfdp.c index 6ee7719e5903..25142ec4737b 100644 --- a/drivers/mtd/spi-nor/sfdp.c +++ b/drivers/mtd/spi-nor/sfdp.c @@ -788,7 +788,7 @@ spi_nor_region_check_overlay(struct spi_nor_erase_region *region, int i; for (i = 0; i < SNOR_ERASE_TYPE_MAX; i++) { - if (!(erase_type & BIT(i))) + if (!(erase[i].size && erase_type & BIT(erase[i].idx))) continue; if (region->size & erase[i].size_mask) { spi_nor_region_mark_overlay(region); @@ -858,6 +858,7 @@ spi_nor_init_non_uniform_erase_map(struct spi_nor *nor, offset = (region[i].offset & ~SNOR_ERASE_FLAGS_MASK) + region[i].size; } + spi_nor_region_mark_end(®ion[i - 1]); save_uniform_erase_type = map->uniform_erase_type; map->uniform_erase_type = spi_nor_sort_erase_mask(map, @@ -881,8 +882,6 @@ spi_nor_init_non_uniform_erase_map(struct spi_nor *nor, if (!(regions_erase_type & BIT(erase[i].idx))) spi_nor_set_erase_type(&erase[i], 0, 0xFF); - spi_nor_region_mark_end(®ion[i - 1]); - return 0; } diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 260f9f46668b..6d9e90887b29 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -87,12 +87,12 @@ config WIREGUARD select CRYPTO_CURVE25519_X86 if X86 && 64BIT select ARM_CRYPTO if ARM select ARM64_CRYPTO if ARM64 - select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON + select CRYPTO_CHACHA20_NEON if ARM || (ARM64 && KERNEL_MODE_NEON) select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON select CRYPTO_POLY1305_ARM if ARM select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2 - select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT) + select CRYPTO_POLY1305_MIPS if MIPS help WireGuard is a secure, fast, and easy to use replacement for IPSec that uses modern cryptography and clever networking tricks. It's diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index 63f48b016ecd..716d1a5bf17b 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -212,18 +212,6 @@ static const struct can_bittiming_const c_can_bittiming_const = { .brp_inc = 1, }; -static inline void c_can_pm_runtime_enable(const struct c_can_priv *priv) -{ - if (priv->device) - pm_runtime_enable(priv->device); -} - -static inline void c_can_pm_runtime_disable(const struct c_can_priv *priv) -{ - if (priv->device) - pm_runtime_disable(priv->device); -} - static inline void c_can_pm_runtime_get_sync(const struct c_can_priv *priv) { if (priv->device) @@ -1335,7 +1323,6 @@ static const struct net_device_ops c_can_netdev_ops = { int register_c_can_dev(struct net_device *dev) { - struct c_can_priv *priv = netdev_priv(dev); int err; /* Deactivate pins to prevent DRA7 DCAN IP from being @@ -1345,28 +1332,19 @@ int register_c_can_dev(struct net_device *dev) */ pinctrl_pm_select_sleep_state(dev->dev.parent); - c_can_pm_runtime_enable(priv); - dev->flags |= IFF_ECHO; /* we support local echo */ dev->netdev_ops = &c_can_netdev_ops; err = register_candev(dev); - if (err) - c_can_pm_runtime_disable(priv); - else + if (!err) devm_can_led_init(dev); - return err; } EXPORT_SYMBOL_GPL(register_c_can_dev); void unregister_c_can_dev(struct net_device *dev) { - struct c_can_priv *priv = netdev_priv(dev); - unregister_candev(dev); - - c_can_pm_runtime_disable(priv); } EXPORT_SYMBOL_GPL(unregister_c_can_dev); diff --git a/drivers/net/can/c_can/c_can_pci.c b/drivers/net/can/c_can/c_can_pci.c index 406b4847e5dc..7efb60b50876 100644 --- a/drivers/net/can/c_can/c_can_pci.c +++ b/drivers/net/can/c_can/c_can_pci.c @@ -239,12 +239,13 @@ static void c_can_pci_remove(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); struct c_can_priv *priv = netdev_priv(dev); + void __iomem *addr = priv->base; unregister_c_can_dev(dev); free_c_can_dev(dev); - pci_iounmap(pdev, priv->base); + pci_iounmap(pdev, addr); pci_disable_msi(pdev); pci_clear_master(pdev); pci_release_regions(pdev); diff --git a/drivers/net/can/c_can/c_can_platform.c b/drivers/net/can/c_can/c_can_platform.c index 05f425ceb53a..47b251b1607c 100644 --- a/drivers/net/can/c_can/c_can_platform.c +++ b/drivers/net/can/c_can/c_can_platform.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -386,6 +387,7 @@ static int c_can_plat_probe(struct platform_device *pdev) platform_set_drvdata(pdev, dev); SET_NETDEV_DEV(dev, &pdev->dev); + pm_runtime_enable(priv->device); ret = register_c_can_dev(dev); if (ret) { dev_err(&pdev->dev, "registering %s failed (err=%d)\n", @@ -398,6 +400,7 @@ static int c_can_plat_probe(struct platform_device *pdev) return 0; exit_free_device: + pm_runtime_disable(priv->device); free_c_can_dev(dev); exit: dev_err(&pdev->dev, "probe failed\n"); @@ -408,9 +411,10 @@ static int c_can_plat_probe(struct platform_device *pdev) static int c_can_plat_remove(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); + struct c_can_priv *priv = netdev_priv(dev); unregister_c_can_dev(dev); - + pm_runtime_disable(priv->device); free_c_can_dev(dev); return 0; diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index c73e2a65c904..2a4f12c3c28b 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -1255,6 +1255,7 @@ static void can_dellink(struct net_device *dev, struct list_head *head) static struct rtnl_link_ops can_link_ops __read_mostly = { .kind = "can", + .netns_refund = true, .maxtype = IFLA_CAN_MAX, .policy = can_policy, .setup = can_setup, diff --git a/drivers/net/can/flexcan.c b/drivers/net/can/flexcan.c index 038fe1036df2..a9502fbc6dd6 100644 --- a/drivers/net/can/flexcan.c +++ b/drivers/net/can/flexcan.c @@ -9,6 +9,7 @@ // // Based on code originally by Andrey Volkov +#include #include #include #include @@ -17,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -242,6 +244,8 @@ #define FLEXCAN_QUIRK_SUPPORT_FD BIT(9) /* support memory detection and correction */ #define FLEXCAN_QUIRK_SUPPORT_ECC BIT(10) +/* Setup stop mode with SCU firmware to support wakeup */ +#define FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW BIT(11) /* Structure of the message buffer */ struct flexcan_mb { @@ -347,6 +351,7 @@ struct flexcan_priv { u8 mb_count; u8 mb_size; u8 clk_src; /* clock source of CAN Protocol Engine */ + u8 scu_idx; u64 rx_mask; u64 tx_mask; @@ -358,6 +363,9 @@ struct flexcan_priv { struct regulator *reg_xceiver; struct flexcan_stop_mode stm; + /* IPC handle when setup stop mode by System Controller firmware(scfw) */ + struct imx_sc_ipc *sc_ipc_handle; + /* Read and Write APIs */ u32 (*read)(void __iomem *addr); void (*write)(u32 val, void __iomem *addr); @@ -387,7 +395,7 @@ static const struct flexcan_devtype_data fsl_imx6q_devtype_data = { static const struct flexcan_devtype_data fsl_imx8qm_devtype_data = { .quirks = FLEXCAN_QUIRK_DISABLE_RXFG | FLEXCAN_QUIRK_ENABLE_EACEN_RRS | FLEXCAN_QUIRK_USE_OFF_TIMESTAMP | FLEXCAN_QUIRK_BROKEN_PERR_STATE | - FLEXCAN_QUIRK_SUPPORT_FD, + FLEXCAN_QUIRK_SUPPORT_FD | FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW, }; static struct flexcan_devtype_data fsl_imx8mp_devtype_data = { @@ -546,18 +554,42 @@ static void flexcan_enable_wakeup_irq(struct flexcan_priv *priv, bool enable) priv->write(reg_mcr, ®s->mcr); } +static int flexcan_stop_mode_enable_scfw(struct flexcan_priv *priv, bool enabled) +{ + u8 idx = priv->scu_idx; + u32 rsrc_id, val; + + rsrc_id = IMX_SC_R_CAN(idx); + + if (enabled) + val = 1; + else + val = 0; + + /* stop mode request via scu firmware */ + return imx_sc_misc_set_control(priv->sc_ipc_handle, rsrc_id, + IMX_SC_C_IPG_STOP, val); +} + static inline int flexcan_enter_stop_mode(struct flexcan_priv *priv) { struct flexcan_regs __iomem *regs = priv->regs; u32 reg_mcr; + int ret; reg_mcr = priv->read(®s->mcr); reg_mcr |= FLEXCAN_MCR_SLF_WAK; priv->write(reg_mcr, ®s->mcr); /* enable stop request */ - regmap_update_bits(priv->stm.gpr, priv->stm.req_gpr, - 1 << priv->stm.req_bit, 1 << priv->stm.req_bit); + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW) { + ret = flexcan_stop_mode_enable_scfw(priv, true); + if (ret < 0) + return ret; + } else { + regmap_update_bits(priv->stm.gpr, priv->stm.req_gpr, + 1 << priv->stm.req_bit, 1 << priv->stm.req_bit); + } return flexcan_low_power_enter_ack(priv); } @@ -566,10 +598,17 @@ static inline int flexcan_exit_stop_mode(struct flexcan_priv *priv) { struct flexcan_regs __iomem *regs = priv->regs; u32 reg_mcr; + int ret; /* remove stop request */ - regmap_update_bits(priv->stm.gpr, priv->stm.req_gpr, - 1 << priv->stm.req_bit, 0); + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW) { + ret = flexcan_stop_mode_enable_scfw(priv, false); + if (ret < 0) + return ret; + } else { + regmap_update_bits(priv->stm.gpr, priv->stm.req_gpr, + 1 << priv->stm.req_bit, 0); + } reg_mcr = priv->read(®s->mcr); reg_mcr &= ~FLEXCAN_MCR_SLF_WAK; @@ -658,11 +697,17 @@ static int flexcan_chip_disable(struct flexcan_priv *priv) static int flexcan_chip_freeze(struct flexcan_priv *priv) { struct flexcan_regs __iomem *regs = priv->regs; - unsigned int timeout = 1000 * 1000 * 10 / priv->can.bittiming.bitrate; + unsigned int timeout; + u32 bitrate = priv->can.bittiming.bitrate; u32 reg; + if (bitrate) + timeout = 1000 * 1000 * 10 / bitrate; + else + timeout = FLEXCAN_TIMEOUT_US / 10; + reg = priv->read(®s->mcr); - reg |= FLEXCAN_MCR_HALT; + reg |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT; priv->write(reg, ®s->mcr); while (timeout-- && !(priv->read(®s->mcr) & FLEXCAN_MCR_FRZ_ACK)) @@ -1440,10 +1485,13 @@ static int flexcan_chip_start(struct net_device *dev) flexcan_set_bittiming(dev); + /* set freeze, halt */ + err = flexcan_chip_freeze(priv); + if (err) + goto out_chip_disable; + /* MCR * - * enable freeze - * halt now * only supervisor access * enable warning int * enable individual RX masking @@ -1452,9 +1500,8 @@ static int flexcan_chip_start(struct net_device *dev) */ reg_mcr = priv->read(®s->mcr); reg_mcr &= ~FLEXCAN_MCR_MAXMB(0xff); - reg_mcr |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT | FLEXCAN_MCR_SUPV | - FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_IRMQ | FLEXCAN_MCR_IDAM_C | - FLEXCAN_MCR_MAXMB(priv->tx_mb_idx); + reg_mcr |= FLEXCAN_MCR_SUPV | FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_IRMQ | + FLEXCAN_MCR_IDAM_C | FLEXCAN_MCR_MAXMB(priv->tx_mb_idx); /* MCR * @@ -1825,10 +1872,14 @@ static int register_flexcandev(struct net_device *dev) if (err) goto out_chip_disable; - /* set freeze, halt and activate FIFO, restrict register access */ + /* set freeze, halt */ + err = flexcan_chip_freeze(priv); + if (err) + goto out_chip_disable; + + /* activate FIFO, restrict register access */ reg = priv->read(®s->mcr); - reg |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT | - FLEXCAN_MCR_FEN | FLEXCAN_MCR_SUPV; + reg |= FLEXCAN_MCR_FEN | FLEXCAN_MCR_SUPV; priv->write(reg, ®s->mcr); /* Currently we only support newer versions of this core @@ -1867,7 +1918,7 @@ static void unregister_flexcandev(struct net_device *dev) unregister_candev(dev); } -static int flexcan_setup_stop_mode(struct platform_device *pdev) +static int flexcan_setup_stop_mode_gpr(struct platform_device *pdev) { struct net_device *dev = platform_get_drvdata(pdev); struct device_node *np = pdev->dev.of_node; @@ -1912,11 +1963,6 @@ static int flexcan_setup_stop_mode(struct platform_device *pdev) "gpr %s req_gpr=0x02%x req_bit=%u\n", gpr_np->full_name, priv->stm.req_gpr, priv->stm.req_bit); - device_set_wakeup_capable(&pdev->dev, true); - - if (of_property_read_bool(np, "wakeup-source")) - device_set_wakeup_enable(&pdev->dev, true); - return 0; out_put_node: @@ -1924,6 +1970,58 @@ static int flexcan_setup_stop_mode(struct platform_device *pdev) return ret; } +static int flexcan_setup_stop_mode_scfw(struct platform_device *pdev) +{ + struct net_device *dev = platform_get_drvdata(pdev); + struct flexcan_priv *priv; + u8 scu_idx; + int ret; + + ret = of_property_read_u8(pdev->dev.of_node, "fsl,scu-index", &scu_idx); + if (ret < 0) { + dev_dbg(&pdev->dev, "failed to get scu index\n"); + return ret; + } + + priv = netdev_priv(dev); + priv->scu_idx = scu_idx; + + /* this function could be defered probe, return -EPROBE_DEFER */ + return imx_scu_get_handle(&priv->sc_ipc_handle); +} + +/* flexcan_setup_stop_mode - Setup stop mode for wakeup + * + * Return: = 0 setup stop mode successfully or doesn't support this feature + * < 0 fail to setup stop mode (could be defered probe) + */ +static int flexcan_setup_stop_mode(struct platform_device *pdev) +{ + struct net_device *dev = platform_get_drvdata(pdev); + struct flexcan_priv *priv; + int ret; + + priv = netdev_priv(dev); + + if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_SCFW) + ret = flexcan_setup_stop_mode_scfw(pdev); + else if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR) + ret = flexcan_setup_stop_mode_gpr(pdev); + else + /* return 0 directly if doesn't support stop mode feature */ + return 0; + + if (ret) + return ret; + + device_set_wakeup_capable(&pdev->dev, true); + + if (of_property_read_bool(pdev->dev.of_node, "wakeup-source")) + device_set_wakeup_enable(&pdev->dev, true); + + return 0; +} + static const struct of_device_id flexcan_of_match[] = { { .compatible = "fsl,imx8qm-flexcan", .data = &fsl_imx8qm_devtype_data, }, { .compatible = "fsl,imx8mp-flexcan", .data = &fsl_imx8mp_devtype_data, }, @@ -2054,17 +2152,20 @@ static int flexcan_probe(struct platform_device *pdev) goto failed_register; } + err = flexcan_setup_stop_mode(pdev); + if (err < 0) { + if (err != -EPROBE_DEFER) + dev_err(&pdev->dev, "setup stop mode failed\n"); + goto failed_setup_stop_mode; + } + of_can_transceiver(dev); devm_can_led_init(dev); - if (priv->devtype_data->quirks & FLEXCAN_QUIRK_SETUP_STOP_MODE_GPR) { - err = flexcan_setup_stop_mode(pdev); - if (err) - dev_dbg(&pdev->dev, "failed to setup stop-mode\n"); - } - return 0; + failed_setup_stop_mode: + unregister_flexcandev(dev); failed_register: pm_runtime_put_noidle(&pdev->dev); pm_runtime_disable(&pdev->dev); diff --git a/drivers/net/can/kvaser_pciefd.c b/drivers/net/can/kvaser_pciefd.c index 969cedb9b0b6..0d77c60f775e 100644 --- a/drivers/net/can/kvaser_pciefd.c +++ b/drivers/net/can/kvaser_pciefd.c @@ -57,6 +57,7 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices"); #define KVASER_PCIEFD_KCAN_STAT_REG 0x418 #define KVASER_PCIEFD_KCAN_MODE_REG 0x41c #define KVASER_PCIEFD_KCAN_BTRN_REG 0x420 +#define KVASER_PCIEFD_KCAN_BUS_LOAD_REG 0x424 #define KVASER_PCIEFD_KCAN_BTRD_REG 0x428 #define KVASER_PCIEFD_KCAN_PWM_REG 0x430 /* Loopback control register */ @@ -949,6 +950,9 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie) timer_setup(&can->bec_poll_timer, kvaser_pciefd_bec_poll_timer, 0); + /* Disable Bus load reporting */ + iowrite32(0, can->reg_base + KVASER_PCIEFD_KCAN_BUS_LOAD_REG); + tx_npackets = ioread32(can->reg_base + KVASER_PCIEFD_KCAN_TX_NPACKETS_REG); if (((tx_npackets >> KVASER_PCIEFD_KCAN_TX_NPACKETS_MAX_SHIFT) & diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c index da551fd0f502..44b3f4b3aea5 100644 --- a/drivers/net/can/m_can/m_can.c +++ b/drivers/net/can/m_can/m_can.c @@ -501,9 +501,6 @@ static int m_can_do_rx_poll(struct net_device *dev, int quota) } while ((rxfs & RXFS_FFL_MASK) && (quota > 0)) { - if (rxfs & RXFS_RFL) - netdev_warn(dev, "Rx FIFO 0 Message Lost\n"); - m_can_read_fifo(dev, rxfs); quota--; @@ -876,7 +873,7 @@ static int m_can_rx_peripheral(struct net_device *dev) { struct m_can_classdev *cdev = netdev_priv(dev); - m_can_rx_handler(dev, 1); + m_can_rx_handler(dev, M_CAN_NAPI_WEIGHT); m_can_enable_all_interrupts(cdev); diff --git a/drivers/net/can/m_can/tcan4x5x.c b/drivers/net/can/m_can/tcan4x5x.c index 970f0e9d19bf..4920de09ffb7 100644 --- a/drivers/net/can/m_can/tcan4x5x.c +++ b/drivers/net/can/m_can/tcan4x5x.c @@ -326,14 +326,14 @@ static int tcan4x5x_init(struct m_can_classdev *cdev) if (ret) return ret; + /* Zero out the MCAN buffers */ + m_can_init_ram(cdev); + ret = regmap_update_bits(tcan4x5x->regmap, TCAN4X5X_CONFIG, TCAN4X5X_MODE_SEL_MASK, TCAN4X5X_MODE_NORMAL); if (ret) return ret; - /* Zero out the MCAN buffers */ - m_can_init_ram(cdev); - return ret; } diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c index f07e8b737d31..ee39e79927ef 100644 --- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c +++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c @@ -2901,7 +2901,7 @@ static int mcp251xfd_probe(struct spi_device *spi) spi_get_device_id(spi)->driver_data; /* Errata Reference: - * mcp2517fd: DS80000789B, mcp2518fd: DS80000792C 4. + * mcp2517fd: DS80000792C 5., mcp2518fd: DS80000789C 4. * * The SPI can write corrupted data to the RAM at fast SPI * speeds: diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index 95c7fa171e35..52100d4fe5a2 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -510,6 +510,19 @@ void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) } EXPORT_SYMBOL(b53_imp_vlan_setup); +static void b53_port_set_learning(struct b53_device *dev, int port, + bool learning) +{ + u16 reg; + + b53_read16(dev, B53_CTRL_PAGE, B53_DIS_LEARNING, ®); + if (learning) + reg &= ~BIT(port); + else + reg |= BIT(port); + b53_write16(dev, B53_CTRL_PAGE, B53_DIS_LEARNING, reg); +} + int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { struct b53_device *dev = ds->priv; @@ -523,6 +536,7 @@ int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) cpu_port = dsa_to_port(ds, port)->cpu_dp->index; b53_br_egress_floods(ds, port, true, true); + b53_port_set_learning(dev, port, false); if (dev->ops->irq_enable) ret = dev->ops->irq_enable(dev, port); @@ -656,6 +670,7 @@ static void b53_enable_cpu_port(struct b53_device *dev, int port) b53_brcm_hdr_setup(dev->ds, port); b53_br_egress_floods(dev->ds, port, true, true); + b53_port_set_learning(dev, port, false); } static void b53_enable_mib(struct b53_device *dev) @@ -1055,13 +1070,6 @@ static int b53_setup(struct dsa_switch *ds) b53_disable_port(ds, port); } - /* Let DSA handle the case were multiple bridges span the same switch - * device and different VLAN awareness settings are requested, which - * would be breaking filtering semantics for any of the other bridge - * devices. (not hardware supported) - */ - ds->vlan_filtering_is_global = true; - return b53_setup_devlink_resources(ds); } @@ -1839,6 +1847,8 @@ int b53_br_join(struct dsa_switch *ds, int port, struct net_device *br) b53_write16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), pvlan); dev->ports[port].vlan_ctl_mask = pvlan; + b53_port_set_learning(dev, port, true); + return 0; } EXPORT_SYMBOL(b53_br_join); @@ -1886,6 +1896,7 @@ void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *br) vl->untag |= BIT(port) | BIT(cpu_port); b53_set_vlan_entry(dev, pvid, vl); } + b53_port_set_learning(dev, port, false); } EXPORT_SYMBOL(b53_br_leave); @@ -2609,6 +2620,13 @@ struct b53_device *b53_switch_alloc(struct device *base, ds->configure_vlan_while_not_filtering = true; ds->untag_bridge_pvid = true; dev->vlan_enabled = ds->configure_vlan_while_not_filtering; + /* Let DSA handle the case were multiple bridges span the same switch + * device and different VLAN awareness settings are requested, which + * would be breaking filtering semantics for any of the other bridge + * devices. (not hardware supported) + */ + ds->vlan_filtering_is_global = true; + mutex_init(&dev->reg_mutex); mutex_init(&dev->stats_mutex); diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index c90985c294a2..b2c539a42154 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -115,6 +115,7 @@ #define B53_UC_FLOOD_MASK 0x32 #define B53_MC_FLOOD_MASK 0x34 #define B53_IPMC_FLOOD_MASK 0x36 +#define B53_DIS_LEARNING 0x3c /* * Override Ports 0-7 State on devices with xMII interfaces (8 bit) diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 445226720ff2..510324916e91 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -222,23 +222,10 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, reg &= ~P_TXQ_PSM_VDD(port); core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL); - /* Enable learning */ - reg = core_readl(priv, CORE_DIS_LEARN); - reg &= ~BIT(port); - core_writel(priv, reg, CORE_DIS_LEARN); - /* Enable Broadcom tags for that port if requested */ - if (priv->brcm_tag_mask & BIT(port)) { + if (priv->brcm_tag_mask & BIT(port)) b53_brcm_hdr_setup(ds, port); - /* Disable learning on ASP port */ - if (port == 7) { - reg = core_readl(priv, CORE_DIS_LEARN); - reg |= BIT(port); - core_writel(priv, reg, CORE_DIS_LEARN); - } - } - /* Configure Traffic Class to QoS mapping, allow each priority to map * to a different queue number */ @@ -597,8 +584,10 @@ static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port) * in bits 15:8 and the patch level in bits 7:0 which is exactly what * the REG_PHY_REVISION register layout is. */ - - return priv->hw_params.gphy_rev; + if (priv->int_phy_mask & BIT(port)) + return priv->hw_params.gphy_rev; + else + return 0; } static void bcm_sf2_sw_validate(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index 45fdb1256dbf..0f1ee4a4fa55 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -654,14 +654,18 @@ static void felix_teardown(struct dsa_switch *ds) struct felix *felix = ocelot_to_felix(ocelot); int port; - if (felix->info->mdio_bus_free) - felix->info->mdio_bus_free(ocelot); - - for (port = 0; port < ocelot->num_phys_ports; port++) - ocelot_deinit_port(ocelot, port); ocelot_deinit_timestamp(ocelot); - /* stop workqueue thread */ ocelot_deinit(ocelot); + + for (port = 0; port < ocelot->num_phys_ports; port++) { + if (dsa_is_unused_port(ds, port)) + continue; + + ocelot_deinit_port(ocelot, port); + } + + if (felix->info->mdio_bus_free) + felix->info->mdio_bus_free(ocelot); } static int felix_hwtstamp_get(struct dsa_switch *ds, int port, diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 4ca029650993..1a855816cbc9 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1834,7 +1834,7 @@ int sja1105_static_config_reload(struct sja1105_private *priv, speed = SPEED_1000; else if (bmcr & BMCR_SPEED100) speed = SPEED_100; - else if (bmcr & BMCR_SPEED10) + else speed = SPEED_10; sja1105_sgmii_pcs_force_speed(priv, speed); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index b40d4377cc71..b2cd3bdba9f8 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -1279,10 +1279,18 @@ #define MDIO_PMA_10GBR_FECCTRL 0x00ab #endif +#ifndef MDIO_PMA_RX_CTRL1 +#define MDIO_PMA_RX_CTRL1 0x8051 +#endif + #ifndef MDIO_PCS_DIG_CTRL #define MDIO_PCS_DIG_CTRL 0x8000 #endif +#ifndef MDIO_PCS_DIGITAL_STAT +#define MDIO_PCS_DIGITAL_STAT 0x8010 +#endif + #ifndef MDIO_AN_XNP #define MDIO_AN_XNP 0x0016 #endif @@ -1358,6 +1366,8 @@ #define XGBE_KR_TRAINING_ENABLE BIT(1) #define XGBE_PCS_CL37_BP BIT(12) +#define XGBE_PCS_PSEQ_STATE_MASK 0x1c +#define XGBE_PCS_PSEQ_STATE_POWER_GOOD 0x10 #define XGBE_AN_CL37_INT_CMPLT BIT(0) #define XGBE_AN_CL37_INT_MASK 0x01 @@ -1375,6 +1385,10 @@ #define XGBE_PMA_CDR_TRACK_EN_OFF 0x00 #define XGBE_PMA_CDR_TRACK_EN_ON 0x01 +#define XGBE_PMA_RX_RST_0_MASK BIT(4) +#define XGBE_PMA_RX_RST_0_RESET_ON 0x10 +#define XGBE_PMA_RX_RST_0_RESET_OFF 0x00 + /* Bit setting and getting macros * The get macro will extract the current bit field value from within * the variable diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 2709a2db5657..395eb0b52680 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1368,6 +1368,7 @@ static void xgbe_stop(struct xgbe_prv_data *pdata) return; netif_tx_stop_all_queues(netdev); + netif_carrier_off(pdata->netdev); xgbe_stop_timers(pdata); flush_workqueue(pdata->dev_workqueue); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c index 93ef5a30cb8d..4e97b4869522 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c @@ -1345,7 +1345,7 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata) &an_restart); if (an_restart) { xgbe_phy_config_aneg(pdata); - return; + goto adjust_link; } if (pdata->phy.link) { @@ -1396,7 +1396,6 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata) pdata->phy_if.phy_impl.stop(pdata); pdata->phy.link = 0; - netif_carrier_off(pdata->netdev); xgbe_phy_adjust_link(pdata); } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 859ded0c06b0..18e48b3bc402 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -922,6 +922,9 @@ static bool xgbe_phy_belfuse_phy_quirks(struct xgbe_prv_data *pdata) if ((phy_id & 0xfffffff0) != 0x03625d10) return false; + /* Reset PHY - wait for self-clearing reset bit to clear */ + genphy_soft_reset(phy_data->phydev); + /* Disable RGMII mode */ phy_write(phy_data->phydev, 0x18, 0x7007); reg = phy_read(phy_data->phydev, 0x18); @@ -1953,6 +1956,27 @@ static void xgbe_phy_set_redrv_mode(struct xgbe_prv_data *pdata) xgbe_phy_put_comm_ownership(pdata); } +static void xgbe_phy_rx_reset(struct xgbe_prv_data *pdata) +{ + int reg; + + reg = XMDIO_READ_BITS(pdata, MDIO_MMD_PCS, MDIO_PCS_DIGITAL_STAT, + XGBE_PCS_PSEQ_STATE_MASK); + if (reg == XGBE_PCS_PSEQ_STATE_POWER_GOOD) { + /* Mailbox command timed out, reset of RX block is required. + * This can be done by asseting the reset bit and wait for + * its compeletion. + */ + XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_RX_CTRL1, + XGBE_PMA_RX_RST_0_MASK, XGBE_PMA_RX_RST_0_RESET_ON); + ndelay(20); + XMDIO_WRITE_BITS(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_RX_CTRL1, + XGBE_PMA_RX_RST_0_MASK, XGBE_PMA_RX_RST_0_RESET_OFF); + usleep_range(40, 50); + netif_err(pdata, link, pdata->netdev, "firmware mailbox reset performed\n"); + } +} + static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata, unsigned int cmd, unsigned int sub_cmd) { @@ -1960,9 +1984,11 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata, unsigned int wait; /* Log if a previous command did not complete */ - if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) + if (XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS)) { netif_dbg(pdata, link, pdata->netdev, "firmware mailbox not ready for command\n"); + xgbe_phy_rx_reset(pdata); + } /* Construct the command */ XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, cmd); @@ -1984,6 +2010,9 @@ static void xgbe_phy_perform_ratechange(struct xgbe_prv_data *pdata, netif_dbg(pdata, link, pdata->netdev, "firmware mailbox command did not complete\n"); + + /* Reset on error */ + xgbe_phy_rx_reset(pdata); } static void xgbe_phy_rrc(struct xgbe_prv_data *pdata) @@ -2584,6 +2613,14 @@ static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart) if (reg & MDIO_STAT1_LSTATUS) return 1; + if (pdata->phy.autoneg == AUTONEG_ENABLE && + phy_data->port_mode == XGBE_PORT_MODE_BACKPLANE) { + if (!test_bit(XGBE_LINK_INIT, &pdata->dev_state)) { + netif_carrier_off(pdata->netdev); + *an_restart = 1; + } + } + /* No link, attempt a receiver reset cycle */ if (phy_data->rrc_count++ > XGBE_RRC_FREQUENCY) { phy_data->rrc_count = 0; diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c index dd5c8a9038bb..a60ce9030581 100644 --- a/drivers/net/ethernet/atheros/ag71xx.c +++ b/drivers/net/ethernet/atheros/ag71xx.c @@ -223,8 +223,6 @@ #define AG71XX_REG_RX_SM 0x01b0 #define AG71XX_REG_TX_SM 0x01b4 -#define ETH_SWITCH_HEADER_LEN 2 - #define AG71XX_DEFAULT_MSG_ENABLE \ (NETIF_MSG_DRV \ | NETIF_MSG_PROBE \ @@ -933,7 +931,7 @@ static void ag71xx_hw_setup(struct ag71xx *ag) static unsigned int ag71xx_max_frame_len(unsigned int mtu) { - return ETH_SWITCH_HEADER_LEN + ETH_HLEN + VLAN_HLEN + mtu + ETH_FCS_LEN; + return ETH_HLEN + VLAN_HLEN + mtu + ETH_FCS_LEN; } static void ag71xx_hw_set_macaddr(struct ag71xx *ag, unsigned char *mac) diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 9b7f1af5f574..9e02f8864593 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -1894,13 +1894,16 @@ static int alx_resume(struct device *dev) if (!netif_running(alx->dev)) return 0; - netif_device_attach(alx->dev); rtnl_lock(); err = __alx_open(alx, true); rtnl_unlock(); + if (err) + return err; - return err; + netif_device_attach(alx->dev); + + return 0; } static SIMPLE_DEV_PM_OPS(alx_pm_ops, alx_suspend, alx_resume); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index d10e4f85dd11..80819d8fddb4 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -8430,10 +8430,18 @@ static void bnxt_setup_inta(struct bnxt *bp) bp->irq_tbl[0].handler = bnxt_inta; } +static int bnxt_init_int_mode(struct bnxt *bp); + static int bnxt_setup_int_mode(struct bnxt *bp) { int rc; + if (!bp->irq_tbl) { + rc = bnxt_init_int_mode(bp); + if (rc || !bp->irq_tbl) + return rc ?: -ENODEV; + } + if (bp->flags & BNXT_FLAG_USING_MSIX) bnxt_setup_msix(bp); else @@ -8618,7 +8626,7 @@ static int bnxt_init_inta(struct bnxt *bp) static int bnxt_init_int_mode(struct bnxt *bp) { - int rc = 0; + int rc = -ENODEV; if (bp->flags & BNXT_FLAG_MSIX_CAP) rc = bnxt_init_msix(bp); @@ -8856,9 +8864,10 @@ void bnxt_tx_disable(struct bnxt *bp) txr->dev_state = BNXT_DEV_STATE_CLOSING; } } + /* Drop carrier first to prevent TX timeout */ + netif_carrier_off(bp->dev); /* Stop all TX queues */ netif_tx_disable(bp->dev); - netif_carrier_off(bp->dev); } void bnxt_tx_enable(struct bnxt *bp) @@ -9338,7 +9347,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) { struct hwrm_func_drv_if_change_output *resp = bp->hwrm_cmd_resp_addr; struct hwrm_func_drv_if_change_input req = {0}; - bool resc_reinit = false, fw_reset = false; + bool fw_reset = !bp->irq_tbl; + bool resc_reinit = false; u32 flags = 0; int rc; @@ -9366,6 +9376,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up) if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) && !fw_reset) { netdev_err(bp->dev, "RESET_DONE not set during FW reset.\n"); + set_bit(BNXT_STATE_ABORT_ERR, &bp->state); return -ENODEV; } if (resc_reinit || fw_reset) { diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 6b7b69ed62db..a9bcf887d2fb 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -472,8 +472,8 @@ static int bnxt_dl_info_get(struct devlink *dl, struct devlink_info_req *req, if (BNXT_PF(bp) && !bnxt_hwrm_get_nvm_cfg_ver(bp, &nvm_cfg_ver)) { u32 ver = nvm_cfg_ver.vu32; - sprintf(buf, "%X.%X.%X", (ver >> 16) & 0xF, (ver >> 8) & 0xF, - ver & 0xF); + sprintf(buf, "%d.%d.%d", (ver >> 16) & 0xf, (ver >> 8) & 0xf, + ver & 0xf); rc = bnxt_dl_info_put(bp, req, BNXT_VERSION_STORED, DEVLINK_INFO_VERSION_GENERIC_FW_PSID, buf); diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 814a5b10141d..07cdb38e7d11 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3950,6 +3950,13 @@ static int macb_init(struct platform_device *pdev) return 0; } +static const struct macb_usrio_config macb_default_usrio = { + .mii = MACB_BIT(MII), + .rmii = MACB_BIT(RMII), + .rgmii = GEM_BIT(RGMII), + .refclk = MACB_BIT(CLKEN), +}; + #if defined(CONFIG_OF) /* 1518 rounded up */ #define AT91ETHER_MAX_RBUFF_SZ 0x600 @@ -4435,13 +4442,6 @@ static int fu540_c000_init(struct platform_device *pdev) return macb_init(pdev); } -static const struct macb_usrio_config macb_default_usrio = { - .mii = MACB_BIT(MII), - .rmii = MACB_BIT(RMII), - .rgmii = GEM_BIT(RGMII), - .refclk = MACB_BIT(CLKEN), -}; - static const struct macb_usrio_config sama7g5_usrio = { .mii = 0, .rmii = 1, @@ -4590,6 +4590,7 @@ static const struct macb_config default_gem_config = { .dma_burst_length = 16, .clk_init = macb_clk_init, .init = macb_init, + .usrio = &macb_default_usrio, .jumbo_max_len = 10240, }; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index 1b49f2fa9b18..34546f5312ee 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -46,6 +46,9 @@ #define MAX_ULD_QSETS 16 #define MAX_ULD_NPORTS 4 +/* ulp_mem_io + ulptx_idata + payload + padding */ +#define MAX_IMM_ULPTX_WR_LEN (32 + 8 + 256 + 8) + /* CPL message priority levels */ enum { CPL_PRIORITY_DATA = 0, /* data messages */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 196652a114c5..3334c9e2152a 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2842,17 +2842,22 @@ int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb) * @skb: the packet * * Returns true if a packet can be sent as an offload WR with immediate - * data. We currently use the same limit as for Ethernet packets. + * data. + * FW_OFLD_TX_DATA_WR limits the payload to 255 bytes due to 8-bit field. + * However, FW_ULPTX_WR commands have a 256 byte immediate only + * payload limit. */ static inline int is_ofld_imm(const struct sk_buff *skb) { struct work_request_hdr *req = (struct work_request_hdr *)skb->data; unsigned long opcode = FW_WR_OP_G(ntohl(req->wr_hi)); - if (opcode == FW_CRYPTO_LOOKASIDE_WR) + if (unlikely(opcode == FW_ULPTX_WR)) + return skb->len <= MAX_IMM_ULPTX_WR_LEN; + else if (opcode == FW_CRYPTO_LOOKASIDE_WR) return skb->len <= SGE_MAX_WR_LEN; else - return skb->len <= MAX_IMM_TX_PKT_LEN; + return skb->len <= MAX_IMM_OFLD_TX_DATA_WR_LEN; } /** diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 1b7e8c91b541..423d6d78d15c 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -727,7 +727,7 @@ static int chcr_ktls_cpl_set_tcb_rpl(struct adapter *adap, unsigned char *input) kvfree(tx_info); return 0; } - tx_info->open_state = false; + tx_info->open_state = CH_KTLS_OPEN_SUCCESS; spin_unlock(&tx_info->lock); complete(&tx_info->completion); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h index 47ba81e42f5d..b1161bdeda4d 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h @@ -50,9 +50,6 @@ #define MIN_RCV_WND (24 * 1024U) #define LOOPBACK(x) (((x) & htonl(0xff000000)) == htonl(0x7f000000)) -/* ulp_mem_io + ulptx_idata + payload + padding */ -#define MAX_IMM_ULPTX_WR_LEN (32 + 8 + 256 + 8) - /* for TX: a skb must have a headroom of at least TX_HEADER_LEN bytes */ #define TX_HEADER_LEN \ (sizeof(struct fw_ofld_tx_data_wr) + sizeof(struct sge_opaque_hdr)) diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c index 3fdc70dab5c1..252adfa5d837 100644 --- a/drivers/net/ethernet/davicom/dm9000.c +++ b/drivers/net/ethernet/davicom/dm9000.c @@ -133,6 +133,8 @@ struct board_info { u32 wake_state; int ip_summed; + + struct regulator *power_supply; }; /* debug code */ @@ -1449,7 +1451,7 @@ dm9000_probe(struct platform_device *pdev) if (ret) { dev_err(dev, "failed to request reset gpio %d: %d\n", reset_gpios, ret); - return -ENODEV; + goto out_regulator_disable; } /* According to manual PWRST# Low Period Min 1ms */ @@ -1461,8 +1463,10 @@ dm9000_probe(struct platform_device *pdev) if (!pdata) { pdata = dm9000_parse_dt(&pdev->dev); - if (IS_ERR(pdata)) - return PTR_ERR(pdata); + if (IS_ERR(pdata)) { + ret = PTR_ERR(pdata); + goto out_regulator_disable; + } } /* Init network device */ @@ -1479,6 +1483,8 @@ dm9000_probe(struct platform_device *pdev) db->dev = &pdev->dev; db->ndev = ndev; + if (!IS_ERR(power)) + db->power_supply = power; spin_lock_init(&db->lock); mutex_init(&db->addr_lock); @@ -1501,7 +1507,7 @@ dm9000_probe(struct platform_device *pdev) goto out; } - db->irq_wake = platform_get_irq(pdev, 1); + db->irq_wake = platform_get_irq_optional(pdev, 1); if (db->irq_wake >= 0) { dev_dbg(db->dev, "wakeup irq %d\n", db->irq_wake); @@ -1703,6 +1709,10 @@ dm9000_probe(struct platform_device *pdev) dm9000_release_board(pdev, db); free_netdev(ndev); +out_regulator_disable: + if (!IS_ERR(power)) + regulator_disable(power); + return ret; } @@ -1760,10 +1770,13 @@ static int dm9000_drv_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); + struct board_info *dm = to_dm9000_board(ndev); unregister_netdev(ndev); - dm9000_release_board(pdev, netdev_priv(ndev)); + dm9000_release_board(pdev, dm); free_netdev(ndev); /* free device structure */ + if (dm->power_supply) + regulator_disable(dm->power_supply); dev_dbg(&pdev->dev, "released and freed device\n"); return 0; diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 88bfe2107938..04421aec2dfd 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -1337,6 +1337,7 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget) */ if (unlikely(priv->need_mac_restart)) { ftgmac100_start_hw(priv); + priv->need_mac_restart = false; /* Re-enable "bad" interrupts */ iowrite32(FTGMAC100_INT_BAD, diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 6faa20bed488..9905caeaeee3 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2672,7 +2672,6 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal, u32 hash; u64 ns; - np = container_of(&portal, struct dpaa_napi_portal, p); dpaa_fq = container_of(fq, struct dpaa_fq, fq_base); fd_status = be32_to_cpu(fd->status); fd_format = qm_fd_get_format(fd); @@ -2687,6 +2686,7 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal, percpu_priv = this_cpu_ptr(priv->percpu_priv); percpu_stats = &percpu_priv->stats; + np = &percpu_priv->np; if (unlikely(dpaa_eth_napi_schedule(percpu_priv, portal, sched_napi))) return qman_cb_dqrr_stop; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index fb0bcd18ec0c..f1c2b3c7f7e9 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -399,10 +399,20 @@ static u32 dpaa2_eth_run_xdp(struct dpaa2_eth_priv *priv, xdp.frame_sz = DPAA2_ETH_RX_BUF_RAW_SIZE; err = xdp_do_redirect(priv->net_dev, &xdp, xdp_prog); - if (unlikely(err)) + if (unlikely(err)) { + addr = dma_map_page(priv->net_dev->dev.parent, + virt_to_page(vaddr), 0, + priv->rx_buf_size, DMA_BIDIRECTIONAL); + if (unlikely(dma_mapping_error(priv->net_dev->dev.parent, addr))) { + free_pages((unsigned long)vaddr, 0); + } else { + ch->buf_count++; + dpaa2_eth_xdp_release_buf(priv, ch, addr); + } ch->stats.xdp_drop++; - else + } else { ch->stats.xdp_redirect++; + } break; } diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index c78d12229730..09471329f3a3 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -281,6 +281,8 @@ static int enetc_poll(struct napi_struct *napi, int budget) int work_done; int i; + enetc_lock_mdio(); + for (i = 0; i < v->count_tx_rings; i++) if (!enetc_clean_tx_ring(&v->tx_ring[i], budget)) complete = false; @@ -291,8 +293,10 @@ static int enetc_poll(struct napi_struct *napi, int budget) if (work_done) v->rx_napi_work = true; - if (!complete) + if (!complete) { + enetc_unlock_mdio(); return budget; + } napi_complete_done(napi, work_done); @@ -301,8 +305,6 @@ static int enetc_poll(struct napi_struct *napi, int budget) v->rx_napi_work = false; - enetc_lock_mdio(); - /* enable interrupts */ enetc_wr_reg_hot(v->rbier, ENETC_RBIER_RXTIE); @@ -327,8 +329,8 @@ static void enetc_get_tx_tstamp(struct enetc_hw *hw, union enetc_tx_bd *txbd, { u32 lo, hi, tstamp_lo; - lo = enetc_rd(hw, ENETC_SICTR0); - hi = enetc_rd(hw, ENETC_SICTR1); + lo = enetc_rd_hot(hw, ENETC_SICTR0); + hi = enetc_rd_hot(hw, ENETC_SICTR1); tstamp_lo = le32_to_cpu(txbd->wb.tstamp); if (lo <= tstamp_lo) hi -= 1; @@ -342,6 +344,12 @@ static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp) if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) { memset(&shhwtstamps, 0, sizeof(shhwtstamps)); shhwtstamps.hwtstamp = ns_to_ktime(tstamp); + /* Ensure skb_mstamp_ns, which might have been populated with + * the txtime, is not mistaken for a software timestamp, + * because this will prevent the dispatch of our hardware + * timestamp to the socket. + */ + skb->tstamp = ktime_set(0, 0); skb_tstamp_tx(skb, &shhwtstamps); } } @@ -358,9 +366,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) i = tx_ring->next_to_clean; tx_swbd = &tx_ring->tx_swbd[i]; - enetc_lock_mdio(); bds_to_clean = enetc_bd_ready_count(tx_ring, i); - enetc_unlock_mdio(); do_tstamp = false; @@ -403,8 +409,6 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) tx_swbd = tx_ring->tx_swbd; } - enetc_lock_mdio(); - /* BD iteration loop end */ if (is_eof) { tx_frm_cnt++; @@ -415,8 +419,6 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) if (unlikely(!bds_to_clean)) bds_to_clean = enetc_bd_ready_count(tx_ring, i); - - enetc_unlock_mdio(); } tx_ring->next_to_clean = i; @@ -527,9 +529,8 @@ static void enetc_get_rx_tstamp(struct net_device *ndev, static void enetc_get_offloads(struct enetc_bdr *rx_ring, union enetc_rx_bd *rxbd, struct sk_buff *skb) { -#ifdef CONFIG_FSL_ENETC_PTP_CLOCK struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev); -#endif + /* TODO: hashing */ if (rx_ring->ndev->features & NETIF_F_RXCSUM) { u16 inet_csum = le16_to_cpu(rxbd->r.inet_csum); @@ -538,12 +539,31 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring, skb->ip_summed = CHECKSUM_COMPLETE; } - /* copy VLAN to skb, if one is extracted, for now we assume it's a - * standard TPID, but HW also supports custom values - */ - if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_VLAN) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), - le16_to_cpu(rxbd->r.vlan_opt)); + if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_VLAN) { + __be16 tpid = 0; + + switch (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_TPID) { + case 0: + tpid = htons(ETH_P_8021Q); + break; + case 1: + tpid = htons(ETH_P_8021AD); + break; + case 2: + tpid = htons(enetc_port_rd(&priv->si->hw, + ENETC_PCVLANR1)); + break; + case 3: + tpid = htons(enetc_port_rd(&priv->si->hw, + ENETC_PCVLANR2)); + break; + default: + break; + } + + __vlan_hwaccel_put_tag(skb, tpid, le16_to_cpu(rxbd->r.vlan_opt)); + } + #ifdef CONFIG_FSL_ENETC_PTP_CLOCK if (priv->active_offloads & ENETC_F_RX_TSTAMP) enetc_get_rx_tstamp(rx_ring->ndev, rxbd, skb); @@ -660,8 +680,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, u32 bd_status; u16 size; - enetc_lock_mdio(); - if (cleaned_cnt >= ENETC_RXBD_BUNDLE) { int count = enetc_refill_rx_ring(rx_ring, cleaned_cnt); @@ -672,19 +690,15 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, rxbd = enetc_rxbd(rx_ring, i); bd_status = le32_to_cpu(rxbd->r.lstatus); - if (!bd_status) { - enetc_unlock_mdio(); + if (!bd_status) break; - } enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index)); dma_rmb(); /* for reading other rxbd fields */ size = le16_to_cpu(rxbd->r.buf_len); skb = enetc_map_rx_buff_to_skb(rx_ring, i, size); - if (!skb) { - enetc_unlock_mdio(); + if (!skb) break; - } enetc_get_offloads(rx_ring, rxbd, skb); @@ -696,7 +710,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, if (unlikely(bd_status & ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))) { - enetc_unlock_mdio(); dev_kfree_skb(skb); while (!(bd_status & ENETC_RXBD_LSTATUS_F)) { dma_rmb(); @@ -736,8 +749,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring, enetc_process_skb(rx_ring, skb); - enetc_unlock_mdio(); - napi_gro_receive(napi, skb); rx_frm_cnt++; @@ -984,7 +995,7 @@ static void enetc_free_rxtx_rings(struct enetc_ndev_priv *priv) enetc_free_tx_ring(priv->tx_ring[i]); } -static int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr) +int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr) { int size = cbdr->bd_count * sizeof(struct enetc_cbd); @@ -1005,7 +1016,7 @@ static int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr) return 0; } -static void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr) +void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr) { int size = cbdr->bd_count * sizeof(struct enetc_cbd); @@ -1013,7 +1024,7 @@ static void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr) cbdr->bd_base = NULL; } -static void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr) +void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr) { /* set CBDR cache attributes */ enetc_wr(hw, ENETC_SICAR2, @@ -1033,7 +1044,7 @@ static void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr) cbdr->cir = hw->reg + ENETC_SICBDRCIR; } -static void enetc_clear_cbdr(struct enetc_hw *hw) +void enetc_clear_cbdr(struct enetc_hw *hw) { enetc_wr(hw, ENETC_SICBDRMR, 0); } @@ -1058,13 +1069,12 @@ static int enetc_setup_default_rss_table(struct enetc_si *si, int num_groups) return 0; } -static int enetc_configure_si(struct enetc_ndev_priv *priv) +int enetc_configure_si(struct enetc_ndev_priv *priv) { struct enetc_si *si = priv->si; struct enetc_hw *hw = &si->hw; int err; - enetc_setup_cbdr(hw, &si->cbd_ring); /* set SI cache attributes */ enetc_wr(hw, ENETC_SICAR0, ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT); @@ -1112,6 +1122,8 @@ int enetc_alloc_si_resources(struct enetc_ndev_priv *priv) if (err) return err; + enetc_setup_cbdr(&si->hw, &si->cbd_ring); + priv->cls_rules = kcalloc(si->num_fs_entries, sizeof(*priv->cls_rules), GFP_KERNEL); if (!priv->cls_rules) { @@ -1119,14 +1131,8 @@ int enetc_alloc_si_resources(struct enetc_ndev_priv *priv) goto err_alloc_cls; } - err = enetc_configure_si(priv); - if (err) - goto err_config_si; - return 0; -err_config_si: - kfree(priv->cls_rules); err_alloc_cls: enetc_clear_cbdr(&si->hw); enetc_free_cbdr(priv->dev, &si->cbd_ring); @@ -1212,7 +1218,8 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring) rx_ring->idr = hw->reg + ENETC_SIRXIDR; enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring)); - enetc_wr(hw, ENETC_SIRXIDR, rx_ring->next_to_use); + /* update ENETC's consumer index */ + enetc_rxbdr_wr(hw, idx, ENETC_RBCIR, rx_ring->next_to_use); /* enable ring */ enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr); diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 8532d23b54f5..8b380fc13314 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -292,6 +292,7 @@ void enetc_get_si_caps(struct enetc_si *si); void enetc_init_si_rings_params(struct enetc_ndev_priv *priv); int enetc_alloc_si_resources(struct enetc_ndev_priv *priv); void enetc_free_si_resources(struct enetc_ndev_priv *priv); +int enetc_configure_si(struct enetc_ndev_priv *priv); int enetc_open(struct net_device *ndev); int enetc_close(struct net_device *ndev); @@ -309,6 +310,10 @@ int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type, void enetc_set_ethtool_ops(struct net_device *ndev); /* control buffer descriptor ring (CBDR) */ +int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr); +void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr); +void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr); +void enetc_clear_cbdr(struct enetc_hw *hw); int enetc_set_mac_flt_entry(struct enetc_si *si, int index, char *mac_addr, int si_map); int enetc_clear_mac_flt_entry(struct enetc_si *si, int index); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index c71fe8d751d5..00938f7960a4 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -172,6 +172,8 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PSIPMAR0(n) (0x0100 + (n) * 0x8) /* n = SI index */ #define ENETC_PSIPMAR1(n) (0x0104 + (n) * 0x8) #define ENETC_PVCLCTR 0x0208 +#define ENETC_PCVLANR1 0x0210 +#define ENETC_PCVLANR2 0x0214 #define ENETC_VLAN_TYPE_C BIT(0) #define ENETC_VLAN_TYPE_S BIT(1) #define ENETC_PVCLCTR_OVTPIDL(bmp) ((bmp) & 0xff) /* VLAN_TYPE */ @@ -232,14 +234,23 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PM0_MAXFRM 0x8014 #define ENETC_SET_TX_MTU(val) ((val) << 16) #define ENETC_SET_MAXFRM(val) ((val) & 0xffff) +#define ENETC_PM0_RX_FIFO 0x801c +#define ENETC_PM0_RX_FIFO_VAL 1 #define ENETC_PM_IMDIO_BASE 0x8030 #define ENETC_PM0_IF_MODE 0x8300 -#define ENETC_PMO_IFM_RG BIT(2) +#define ENETC_PM0_IFM_RG BIT(2) #define ENETC_PM0_IFM_RLP (BIT(5) | BIT(11)) -#define ENETC_PM0_IFM_RGAUTO (BIT(15) | ENETC_PMO_IFM_RG | BIT(1)) -#define ENETC_PM0_IFM_XGMII BIT(12) +#define ENETC_PM0_IFM_EN_AUTO BIT(15) +#define ENETC_PM0_IFM_SSP_MASK GENMASK(14, 13) +#define ENETC_PM0_IFM_SSP_1000 (2 << 13) +#define ENETC_PM0_IFM_SSP_100 (0 << 13) +#define ENETC_PM0_IFM_SSP_10 (1 << 13) +#define ENETC_PM0_IFM_FULL_DPX BIT(12) +#define ENETC_PM0_IFM_IFMODE_MASK GENMASK(1, 0) +#define ENETC_PM0_IFM_IFMODE_XGMII 0 +#define ENETC_PM0_IFM_IFMODE_GMII 2 #define ENETC_PSIDCAPR 0x1b08 #define ENETC_PSIDCAPR_MSK GENMASK(15, 0) #define ENETC_PSFCAPR 0x1b18 @@ -453,6 +464,8 @@ static inline u64 _enetc_rd_reg64_wa(void __iomem *reg) #define enetc_wr_reg(reg, val) _enetc_wr_reg_wa((reg), (val)) #define enetc_rd(hw, off) enetc_rd_reg((hw)->reg + (off)) #define enetc_wr(hw, off, val) enetc_wr_reg((hw)->reg + (off), val) +#define enetc_rd_hot(hw, off) enetc_rd_reg_hot((hw)->reg + (off)) +#define enetc_wr_hot(hw, off, val) enetc_wr_reg_hot((hw)->reg + (off), val) #define enetc_rd64(hw, off) _enetc_rd_reg64_wa((hw)->reg + (off)) /* port register accessors - PF only */ #define enetc_port_rd(hw, off) enetc_rd_reg((hw)->port + (off)) @@ -568,6 +581,7 @@ union enetc_rx_bd { #define ENETC_RXBD_LSTATUS(flags) ((flags) << 16) #define ENETC_RXBD_FLAG_VLAN BIT(9) #define ENETC_RXBD_FLAG_TSTMP BIT(10) +#define ENETC_RXBD_FLAG_TPID GENMASK(1, 0) #define ENETC_MAC_ADDR_FILT_CNT 8 /* # of supported entries per port */ #define EMETC_MAC_ADDR_FILT_RES 3 /* # of reserved entries at the beginning */ diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c index 3eb5f1375bd4..224fc37a6757 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c @@ -190,7 +190,6 @@ static void enetc_pf_set_rx_mode(struct net_device *ndev) { struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_pf *pf = enetc_si_priv(priv->si); - char vlan_promisc_simap = pf->vlan_promisc_simap; struct enetc_hw *hw = &priv->si->hw; bool uprom = false, mprom = false; struct enetc_mac_filter *filter; @@ -203,16 +202,12 @@ static void enetc_pf_set_rx_mode(struct net_device *ndev) psipmr = ENETC_PSIPMR_SET_UP(0) | ENETC_PSIPMR_SET_MP(0); uprom = true; mprom = true; - /* Enable VLAN promiscuous mode for SI0 (PF) */ - vlan_promisc_simap |= BIT(0); } else if (ndev->flags & IFF_ALLMULTI) { /* enable multi cast promisc mode for SI0 (PF) */ psipmr = ENETC_PSIPMR_SET_MP(0); mprom = true; } - enetc_set_vlan_promisc(&pf->si->hw, vlan_promisc_simap); - /* first 2 filter entries belong to PF */ if (!uprom) { /* Update unicast filters */ @@ -320,7 +315,7 @@ static void enetc_set_loopback(struct net_device *ndev, bool en) u32 reg; reg = enetc_port_rd(hw, ENETC_PM0_IF_MODE); - if (reg & ENETC_PMO_IFM_RG) { + if (reg & ENETC_PM0_IFM_RG) { /* RGMII mode */ reg = (reg & ~ENETC_PM0_IFM_RLP) | (en ? ENETC_PM0_IFM_RLP : 0); @@ -495,17 +490,30 @@ static void enetc_configure_port_mac(struct enetc_hw *hw) enetc_port_wr(hw, ENETC_PM1_CMD_CFG, ENETC_PM0_CMD_PHY_TX_EN | ENETC_PM0_CMD_TXP | ENETC_PM0_PROMISC); + + /* On LS1028A, the MAC RX FIFO defaults to 2, which is too high + * and may lead to RX lock-up under traffic. Set it to 1 instead, + * as recommended by the hardware team. + */ + enetc_port_wr(hw, ENETC_PM0_RX_FIFO, ENETC_PM0_RX_FIFO_VAL); } static void enetc_mac_config(struct enetc_hw *hw, phy_interface_t phy_mode) { - /* set auto-speed for RGMII */ - if (enetc_port_rd(hw, ENETC_PM0_IF_MODE) & ENETC_PMO_IFM_RG || - phy_interface_mode_is_rgmii(phy_mode)) - enetc_port_wr(hw, ENETC_PM0_IF_MODE, ENETC_PM0_IFM_RGAUTO); + u32 val; - if (phy_mode == PHY_INTERFACE_MODE_USXGMII) - enetc_port_wr(hw, ENETC_PM0_IF_MODE, ENETC_PM0_IFM_XGMII); + if (phy_interface_mode_is_rgmii(phy_mode)) { + val = enetc_port_rd(hw, ENETC_PM0_IF_MODE); + val &= ~ENETC_PM0_IFM_EN_AUTO; + val &= ENETC_PM0_IFM_IFMODE_MASK; + val |= ENETC_PM0_IFM_IFMODE_GMII | ENETC_PM0_IFM_RG; + enetc_port_wr(hw, ENETC_PM0_IF_MODE, val); + } + + if (phy_mode == PHY_INTERFACE_MODE_USXGMII) { + val = ENETC_PM0_IFM_FULL_DPX | ENETC_PM0_IFM_IFMODE_XGMII; + enetc_port_wr(hw, ENETC_PM0_IF_MODE, val); + } } static void enetc_mac_enable(struct enetc_hw *hw, bool en) @@ -937,6 +945,34 @@ static void enetc_pl_mac_config(struct phylink_config *config, phylink_set_pcs(priv->phylink, &pf->pcs->pcs); } +static void enetc_force_rgmii_mac(struct enetc_hw *hw, int speed, int duplex) +{ + u32 old_val, val; + + old_val = val = enetc_port_rd(hw, ENETC_PM0_IF_MODE); + + if (speed == SPEED_1000) { + val &= ~ENETC_PM0_IFM_SSP_MASK; + val |= ENETC_PM0_IFM_SSP_1000; + } else if (speed == SPEED_100) { + val &= ~ENETC_PM0_IFM_SSP_MASK; + val |= ENETC_PM0_IFM_SSP_100; + } else if (speed == SPEED_10) { + val &= ~ENETC_PM0_IFM_SSP_MASK; + val |= ENETC_PM0_IFM_SSP_10; + } + + if (duplex == DUPLEX_FULL) + val |= ENETC_PM0_IFM_FULL_DPX; + else + val &= ~ENETC_PM0_IFM_FULL_DPX; + + if (val == old_val) + return; + + enetc_port_wr(hw, ENETC_PM0_IF_MODE, val); +} + static void enetc_pl_mac_link_up(struct phylink_config *config, struct phy_device *phy, unsigned int mode, phy_interface_t interface, int speed, @@ -949,6 +985,10 @@ static void enetc_pl_mac_link_up(struct phylink_config *config, if (priv->active_offloads & ENETC_F_QBV) enetc_sched_speed_set(priv, speed); + if (!phylink_autoneg_inband(mode) && + phy_interface_mode_is_rgmii(interface)) + enetc_force_rgmii_mac(&pf->si->hw, speed, duplex); + enetc_mac_enable(&pf->si->hw, true); } @@ -1041,6 +1081,26 @@ static int enetc_init_port_rss_memory(struct enetc_si *si) return err; } +static void enetc_init_unused_port(struct enetc_si *si) +{ + struct device *dev = &si->pdev->dev; + struct enetc_hw *hw = &si->hw; + int err; + + si->cbd_ring.bd_count = ENETC_CBDR_DEFAULT_SIZE; + err = enetc_alloc_cbdr(dev, &si->cbd_ring); + if (err) + return; + + enetc_setup_cbdr(hw, &si->cbd_ring); + + enetc_init_port_rfs_memory(si); + enetc_init_port_rss_memory(si); + + enetc_clear_cbdr(hw); + enetc_free_cbdr(dev, &si->cbd_ring); +} + static int enetc_pf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -1051,11 +1111,6 @@ static int enetc_pf_probe(struct pci_dev *pdev, struct enetc_pf *pf; int err; - if (node && !of_device_is_available(node)) { - dev_info(&pdev->dev, "device is disabled, skipping\n"); - return -ENODEV; - } - err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(*pf)); if (err) { dev_err(&pdev->dev, "PCI probing failed\n"); @@ -1069,6 +1124,13 @@ static int enetc_pf_probe(struct pci_dev *pdev, goto err_map_pf_space; } + if (node && !of_device_is_available(node)) { + enetc_init_unused_port(si); + dev_info(&pdev->dev, "device is disabled, skipping\n"); + err = -ENODEV; + goto err_device_disabled; + } + pf = enetc_si_priv(si); pf->si = si; pf->total_vfs = pci_sriov_get_totalvfs(pdev); @@ -1108,6 +1170,12 @@ static int enetc_pf_probe(struct pci_dev *pdev, goto err_init_port_rss; } + err = enetc_configure_si(priv); + if (err) { + dev_err(&pdev->dev, "Failed to configure SI\n"); + goto err_config_si; + } + err = enetc_alloc_msix(priv); if (err) { dev_err(&pdev->dev, "MSIX alloc failed\n"); @@ -1136,6 +1204,7 @@ static int enetc_pf_probe(struct pci_dev *pdev, enetc_mdiobus_destroy(pf); err_mdiobus_create: enetc_free_msix(priv); +err_config_si: err_init_port_rss: err_init_port_rfs: err_alloc_msix: @@ -1144,6 +1213,7 @@ static int enetc_pf_probe(struct pci_dev *pdev, si->ndev = NULL; free_netdev(ndev); err_alloc_netdev: +err_device_disabled: err_map_pf_space: enetc_pci_remove(pdev); @@ -1157,14 +1227,15 @@ static void enetc_pf_remove(struct pci_dev *pdev) struct enetc_ndev_priv *priv; priv = netdev_priv(si->ndev); - enetc_phylink_destroy(priv); - enetc_mdiobus_destroy(pf); if (pf->num_vfs) enetc_sriov_configure(pdev, 0); unregister_netdev(si->ndev); + enetc_phylink_destroy(priv); + enetc_mdiobus_destroy(pf); + enetc_free_msix(priv); enetc_free_si_resources(priv); diff --git a/drivers/net/ethernet/freescale/enetc/enetc_vf.c b/drivers/net/ethernet/freescale/enetc/enetc_vf.c index 39c1a09e69a9..9b755a84c2d6 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_vf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_vf.c @@ -171,6 +171,12 @@ static int enetc_vf_probe(struct pci_dev *pdev, goto err_alloc_si_res; } + err = enetc_configure_si(priv); + if (err) { + dev_err(&pdev->dev, "Failed to configure SI\n"); + goto err_config_si; + } + err = enetc_alloc_msix(priv); if (err) { dev_err(&pdev->dev, "MSIX alloc failed\n"); @@ -187,6 +193,7 @@ static int enetc_vf_probe(struct pci_dev *pdev, err_reg_netdev: enetc_free_msix(priv); +err_config_si: err_alloc_msix: enetc_free_si_resources(priv); err_alloc_si_res: diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index 2e344aada4c6..1753807cbf97 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -377,9 +377,16 @@ static int fec_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) u64 ns; unsigned long flags; + mutex_lock(&adapter->ptp_clk_mutex); + /* Check the ptp clock */ + if (!adapter->ptp_clk_on) { + mutex_unlock(&adapter->ptp_clk_mutex); + return -EINVAL; + } spin_lock_irqsave(&adapter->tmreg_lock, flags); ns = timecounter_read(&adapter->tc); spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + mutex_unlock(&adapter->ptp_clk_mutex); *ts = ns_to_timespec64(ns); diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index d391a45cebb6..4fab2ee5bbf5 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2391,6 +2391,10 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus, if (lstatus & BD_LFLAG(RXBD_LAST)) size -= skb->len; + WARN(size < 0, "gianfar: rx fragment size underflow"); + if (size < 0) + return false; + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, rxb->page_offset + RXBUF_ALIGNMENT, size, GFAR_RXB_TRUESIZE); @@ -2553,6 +2557,17 @@ static int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, if (lstatus & BD_LFLAG(RXBD_EMPTY)) break; + /* lost RXBD_LAST descriptor due to overrun */ + if (skb && + (lstatus & BD_LFLAG(RXBD_FIRST))) { + /* discard faulty buffer */ + dev_kfree_skb(skb); + skb = NULL; + rx_queue->stats.rx_dropped++; + + /* can continue normally */ + } + /* order rx buffer descriptor reads */ rmb(); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index 858cb293152a..8bce5f1510be 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -1663,8 +1663,10 @@ static int hns_nic_clear_all_rx_fetch(struct net_device *ndev) for (j = 0; j < fetch_num; j++) { /* alloc one skb and init */ skb = hns_assemble_skb(ndev); - if (!skb) + if (!skb) { + ret = -ENOMEM; goto out; + } rd = &tx_ring_data(priv, skb->queue_mapping); hns_nic_net_xmit_hw(ndev, skb, rd); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h index edfadb5cb1c3..a731f207b4f1 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h @@ -1048,16 +1048,16 @@ struct hclge_fd_tcam_config_3_cmd { #define HCLGE_FD_AD_DROP_B 0 #define HCLGE_FD_AD_DIRECT_QID_B 1 #define HCLGE_FD_AD_QID_S 2 -#define HCLGE_FD_AD_QID_M GENMASK(12, 2) +#define HCLGE_FD_AD_QID_M GENMASK(11, 2) #define HCLGE_FD_AD_USE_COUNTER_B 12 #define HCLGE_FD_AD_COUNTER_NUM_S 13 #define HCLGE_FD_AD_COUNTER_NUM_M GENMASK(20, 13) #define HCLGE_FD_AD_NXT_STEP_B 20 #define HCLGE_FD_AD_NXT_KEY_S 21 -#define HCLGE_FD_AD_NXT_KEY_M GENMASK(26, 21) +#define HCLGE_FD_AD_NXT_KEY_M GENMASK(25, 21) #define HCLGE_FD_AD_WR_RULE_ID_B 0 #define HCLGE_FD_AD_RULE_ID_S 1 -#define HCLGE_FD_AD_RULE_ID_M GENMASK(13, 1) +#define HCLGE_FD_AD_RULE_ID_M GENMASK(12, 1) #define HCLGE_FD_AD_TC_OVRD_B 16 #define HCLGE_FD_AD_TC_SIZE_S 17 #define HCLGE_FD_AD_TC_SIZE_M GENMASK(20, 17) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 48549db23c52..67764d930435 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -5194,9 +5194,9 @@ static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y, case BIT(INNER_SRC_MAC): for (i = 0; i < ETH_ALEN; i++) { calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.src_mac[i], - rule->tuples.src_mac[i]); + rule->tuples_mask.src_mac[i]); calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.src_mac[i], - rule->tuples.src_mac[i]); + rule->tuples_mask.src_mac[i]); } return true; @@ -6283,8 +6283,7 @@ static void hclge_fd_get_ext_info(struct ethtool_rx_flow_spec *fs, fs->h_ext.vlan_tci = cpu_to_be16(rule->tuples.vlan_tag1); fs->m_ext.vlan_tci = rule->unused_tuple & BIT(INNER_VLAN_TAG_FST) ? - cpu_to_be16(VLAN_VID_MASK) : - cpu_to_be16(rule->tuples_mask.vlan_tag1); + 0 : cpu_to_be16(rule->tuples_mask.vlan_tag1); } if (fs->flow_type & FLOW_MAC_EXT) { diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index a536fdbf05e1..3552c4485ed5 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -247,8 +247,13 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter, if (!ltb->buff) return; + /* VIOS automatically unmaps the long term buffer at remote + * end for the following resets: + * FAILOVER, MOBILITY, TIMEOUT. + */ if (adapter->reset_reason != VNIC_RESET_FAILOVER && - adapter->reset_reason != VNIC_RESET_MOBILITY) + adapter->reset_reason != VNIC_RESET_MOBILITY && + adapter->reset_reason != VNIC_RESET_TIMEOUT) send_request_unmap(adapter, ltb->map_id); dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); } @@ -1353,10 +1358,8 @@ static int __ibmvnic_close(struct net_device *netdev) adapter->state = VNIC_CLOSING; rc = set_link_state(adapter, IBMVNIC_LOGICAL_LNK_DN); - if (rc) - return rc; adapter->state = VNIC_CLOSED; - return 0; + return rc; } static int ibmvnic_close(struct net_device *netdev) @@ -1702,6 +1705,9 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) skb_copy_from_linear_data(skb, dst, skb->len); } + /* post changes to long_term_buff *dst before VIOS accessing it */ + dma_wmb(); + tx_pool->consumer_index = (tx_pool->consumer_index + 1) % tx_pool->num_buffers; @@ -1917,10 +1923,9 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - if (adapter->state != VNIC_PROBED) { - ether_addr_copy(adapter->mac_addr, addr->sa_data); + ether_addr_copy(adapter->mac_addr, addr->sa_data); + if (adapter->state != VNIC_PROBED) rc = __ibmvnic_set_mac(netdev, addr->sa_data); - } return rc; } @@ -2389,6 +2394,8 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, unsigned long flags; int ret; + spin_lock_irqsave(&adapter->rwi_lock, flags); + /* * If failover is pending don't schedule any other reset. * Instead let the failover complete. If there is already a @@ -2409,14 +2416,11 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, goto err; } - spin_lock_irqsave(&adapter->rwi_lock, flags); - list_for_each(entry, &adapter->rwi_list) { tmp = list_entry(entry, struct ibmvnic_rwi, list); if (tmp->reset_reason == reason) { netdev_dbg(netdev, "Skipping matching reset, reason=%d\n", reason); - spin_unlock_irqrestore(&adapter->rwi_lock, flags); ret = EBUSY; goto err; } @@ -2424,8 +2428,6 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, rwi = kzalloc(sizeof(*rwi), GFP_ATOMIC); if (!rwi) { - spin_unlock_irqrestore(&adapter->rwi_lock, flags); - ibmvnic_close(netdev); ret = ENOMEM; goto err; } @@ -2438,12 +2440,17 @@ static int ibmvnic_reset(struct ibmvnic_adapter *adapter, } rwi->reset_reason = reason; list_add_tail(&rwi->list, &adapter->rwi_list); - spin_unlock_irqrestore(&adapter->rwi_lock, flags); netdev_dbg(adapter->netdev, "Scheduling reset (reason %d)\n", reason); schedule_work(&adapter->ibmvnic_reset); - return 0; + ret = 0; err: + /* ibmvnic_close() below can block, so drop the lock first */ + spin_unlock_irqrestore(&adapter->rwi_lock, flags); + + if (ret == ENOMEM) + ibmvnic_close(netdev); + return -ret; } @@ -2541,6 +2548,8 @@ static int ibmvnic_poll(struct napi_struct *napi, int budget) offset = be16_to_cpu(next->rx_comp.off_frame_data); flags = next->rx_comp.flags; skb = rx_buff->skb; + /* load long_term_buff before copying to skb */ + dma_rmb(); skb_copy_to_linear_data(skb, rx_buff->data + offset, length); @@ -5273,16 +5282,14 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) { struct device *dev = &adapter->vdev->dev; unsigned long timeout = msecs_to_jiffies(20000); - u64 old_num_rx_queues, old_num_tx_queues; + u64 old_num_rx_queues = adapter->req_rx_queues; + u64 old_num_tx_queues = adapter->req_tx_queues; int rc; adapter->from_passive_init = false; - if (reset) { - old_num_rx_queues = adapter->req_rx_queues; - old_num_tx_queues = adapter->req_tx_queues; + if (reset) reinit_completion(&adapter->init_done); - } adapter->init_done_rc = 0; rc = ibmvnic_send_crq_init(adapter); @@ -5459,7 +5466,18 @@ static int ibmvnic_remove(struct vio_dev *dev) unsigned long flags; spin_lock_irqsave(&adapter->state_lock, flags); + + /* If ibmvnic_reset() is scheduling a reset, wait for it to + * finish. Then, set the state to REMOVING to prevent it from + * scheduling any more work and to have reset functions ignore + * any resets that have already been scheduled. Drop the lock + * after setting state, so __ibmvnic_reset() which is called + * from the flush_work() below, can make progress. + */ + spin_lock(&adapter->rwi_lock); adapter->state = VNIC_REMOVING; + spin_unlock(&adapter->rwi_lock); + spin_unlock_irqrestore(&adapter->state_lock, flags); flush_work(&adapter->ibmvnic_reset); diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index c09c3f6bba9f..72fea3b1c87d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -31,7 +31,7 @@ #define IBMVNIC_BUFFS_PER_POOL 100 #define IBMVNIC_MAX_QUEUES 16 #define IBMVNIC_MAX_QUEUE_SZ 4096 -#define IBMVNIC_MAX_IND_DESCS 128 +#define IBMVNIC_MAX_IND_DESCS 16 #define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) #define IBMVNIC_TSO_BUF_SZ 65536 @@ -1081,6 +1081,7 @@ struct ibmvnic_adapter { struct tasklet_struct tasklet; enum vnic_state state; enum ibmvnic_reset_reason reset_reason; + /* when taking both state and rwi locks, take state lock first */ spinlock_t rwi_lock; struct list_head rwi_list; struct work_struct ibmvnic_reset; @@ -1097,6 +1098,8 @@ struct ibmvnic_adapter { struct ibmvnic_tunables desired; struct ibmvnic_tunables fallback; - /* Used for serializatin of state field */ + /* Used for serialization of state field. When taking both state + * and rwi locks, take state lock first. + */ spinlock_t state_lock; }; diff --git a/drivers/net/ethernet/intel/e1000e/82571.c b/drivers/net/ethernet/intel/e1000e/82571.c index 88faf05e23ba..0b1e890dd583 100644 --- a/drivers/net/ethernet/intel/e1000e/82571.c +++ b/drivers/net/ethernet/intel/e1000e/82571.c @@ -899,6 +899,8 @@ static s32 e1000_set_d0_lplu_state_82571(struct e1000_hw *hw, bool active) } else { data &= ~IGP02E1000_PM_D0_LPLU; ret_val = e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, data); + if (ret_val) + return ret_val; /* LPLU and SmartSpeed are mutually exclusive. LPLU is used * during Dx states where the power conservation is most * important. During driver activity we should enable diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index e9b82c209c2d..a0948002ddf8 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -5974,15 +5974,19 @@ static void e1000_reset_task(struct work_struct *work) struct e1000_adapter *adapter; adapter = container_of(work, struct e1000_adapter, reset_task); + rtnl_lock(); /* don't run the task if already down */ - if (test_bit(__E1000_DOWN, &adapter->state)) + if (test_bit(__E1000_DOWN, &adapter->state)) { + rtnl_unlock(); return; + } if (!(adapter->flags & FLAG_RESTART_NOW)) { e1000e_dump(adapter); e_err("Reset adapter unexpectedly\n"); } e1000e_reinit_locked(adapter); + rtnl_unlock(); } /** diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index 26ba1f3eb2d8..9e81f85ee2d8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -4878,7 +4878,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) enum i40e_admin_queue_err adq_err; struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; - bool is_reset_needed; + u32 reset_needed = 0; i40e_status status; u32 i, j; @@ -4923,9 +4923,11 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) flags_complete: changed_flags = orig_flags ^ new_flags; - is_reset_needed = !!(changed_flags & (I40E_FLAG_VEB_STATS_ENABLED | - I40E_FLAG_LEGACY_RX | I40E_FLAG_SOURCE_PRUNING_DISABLED | - I40E_FLAG_DISABLE_FW_LLDP)); + if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP) + reset_needed = I40E_PF_RESET_AND_REBUILD_FLAG; + if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED | + I40E_FLAG_LEGACY_RX | I40E_FLAG_SOURCE_PRUNING_DISABLED)) + reset_needed = BIT(__I40E_PF_RESET_REQUESTED); /* Before we finalize any flag changes, we need to perform some * checks to ensure that the changes are supported and safe. @@ -5057,7 +5059,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) case I40E_AQ_RC_EEXIST: dev_warn(&pf->pdev->dev, "FW LLDP agent is already running\n"); - is_reset_needed = false; + reset_needed = 0; break; case I40E_AQ_RC_EPERM: dev_warn(&pf->pdev->dev, @@ -5086,8 +5088,8 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) /* Issue reset to cause things to take effect, as additional bits * are added we will need to create a mask of bits requiring reset */ - if (is_reset_needed) - i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true); + if (reset_needed) + i40e_do_reset(pf, reset_needed, true); return 0; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index 1db482d310c2..4a2d03cada01 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -2616,7 +2616,7 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) return; if (!test_and_clear_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state)) return; - if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) { + if (test_bit(__I40E_VF_DISABLE, pf->state)) { set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state); return; } @@ -2634,7 +2634,6 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf) } } } - clear_bit(__I40E_VF_DISABLE, pf->state); } /** @@ -5921,7 +5920,7 @@ static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid, ch->enabled_tc = !i40e_is_channel_macvlan(ch) && enabled_tc; ch->seid = ctxt.seid; ch->vsi_number = ctxt.vsi_number; - ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx); + ch->stat_counter_idx = le16_to_cpu(ctxt.info.stat_counter_idx); /* copy just the sections touched not the entire info * since not all sections are valid as returned by @@ -7600,8 +7599,8 @@ static inline void i40e_set_cld_element(struct i40e_cloud_filter *filter, struct i40e_aqc_cloud_filters_element_data *cld) { - int i, j; u32 ipa; + int i; memset(cld, 0, sizeof(*cld)); ether_addr_copy(cld->outer_mac, filter->dst_mac); @@ -7612,14 +7611,14 @@ i40e_set_cld_element(struct i40e_cloud_filter *filter, if (filter->n_proto == ETH_P_IPV6) { #define IPV6_MAX_INDEX (ARRAY_SIZE(filter->dst_ipv6) - 1) - for (i = 0, j = 0; i < ARRAY_SIZE(filter->dst_ipv6); - i++, j += 2) { + for (i = 0; i < ARRAY_SIZE(filter->dst_ipv6); i++) { ipa = be32_to_cpu(filter->dst_ipv6[IPV6_MAX_INDEX - i]); - ipa = cpu_to_le32(ipa); - memcpy(&cld->ipaddr.raw_v6.data[j], &ipa, sizeof(ipa)); + + *(__le32 *)&cld->ipaddr.raw_v6.data[i * 2] = cpu_to_le32(ipa); } } else { ipa = be32_to_cpu(filter->dst_ipv4); + memcpy(&cld->ipaddr.v4.data, &ipa, sizeof(ipa)); } @@ -7667,6 +7666,8 @@ int i40e_add_del_cloud_filter(struct i40e_vsi *vsi, if (filter->flags >= ARRAY_SIZE(flag_table)) return I40E_ERR_CONFIG; + memset(&cld_filter, 0, sizeof(cld_filter)); + /* copy element needed to add cloud filter from filter */ i40e_set_cld_element(filter, &cld_filter); @@ -7730,10 +7731,13 @@ int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, return -EOPNOTSUPP; /* adding filter using src_port/src_ip is not supported at this stage */ - if (filter->src_port || filter->src_ipv4 || + if (filter->src_port || + (filter->src_ipv4 && filter->n_proto != ETH_P_IPV6) || !ipv6_addr_any(&filter->ip.v6.src_ip6)) return -EOPNOTSUPP; + memset(&cld_filter, 0, sizeof(cld_filter)); + /* copy element needed to add cloud filter from filter */ i40e_set_cld_element(filter, &cld_filter.element); @@ -7757,7 +7761,7 @@ int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi, cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT); } - } else if (filter->dst_ipv4 || + } else if ((filter->dst_ipv4 && filter->n_proto != ETH_P_IPV6) || !ipv6_addr_any(&filter->ip.v6.dst_ip6)) { cld_filter.element.flags = cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_IP_PORT); @@ -8533,11 +8537,6 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) dev_dbg(&pf->pdev->dev, "PFR requested\n"); i40e_handle_reset_warning(pf, lock_acquired); - dev_info(&pf->pdev->dev, - pf->flags & I40E_FLAG_DISABLE_FW_LLDP ? - "FW LLDP is disabled\n" : - "FW LLDP is enabled\n"); - } else if (reset_flags & I40E_PF_RESET_AND_REBUILD_FLAG) { /* Request a PF Reset * @@ -8545,6 +8544,10 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired) */ i40e_prep_for_reset(pf, lock_acquired); i40e_reset_and_rebuild(pf, true, lock_acquired); + dev_info(&pf->pdev->dev, + pf->flags & I40E_FLAG_DISABLE_FW_LLDP ? + "FW LLDP is disabled\n" : + "FW LLDP is enabled\n"); } else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) { int v; @@ -10001,7 +10004,6 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state); struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi]; struct i40e_hw *hw = &pf->hw; - u8 set_fc_aq_fail = 0; i40e_status ret; u32 val; int v; @@ -10127,13 +10129,6 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired) i40e_stat_str(&pf->hw, ret), i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); - /* make sure our flow control settings are restored */ - ret = i40e_set_fc(&pf->hw, &set_fc_aq_fail, true); - if (ret) - dev_dbg(&pf->pdev->dev, "setting flow control: ret = %s last_status = %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); - /* Rebuild the VSIs and VEBs that existed before reset. * They are still in our local switch element arrays, so only * need to rebuild the switch model in the HW. @@ -11709,6 +11704,8 @@ i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf) struct i40e_aqc_configure_partition_bw_data bw_data; i40e_status status; + memset(&bw_data, 0, sizeof(bw_data)); + /* Set the valid bit for this PF */ bw_data.pf_valid_bits = cpu_to_le16(BIT(pf->hw.pf_id)); bw_data.max_bw[pf->hw.pf_id] = pf->max_bw & I40E_ALT_BW_VALUE_MASK; @@ -14714,7 +14711,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) int err; u32 val; u32 i; - u8 set_fc_aq_fail; err = pci_enable_device_mem(pdev); if (err) @@ -15048,24 +15044,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } INIT_LIST_HEAD(&pf->vsi[pf->lan_vsi]->ch_list); - /* Make sure flow control is set according to current settings */ - err = i40e_set_fc(hw, &set_fc_aq_fail, true); - if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_GET) - dev_dbg(&pf->pdev->dev, - "Set fc with err %s aq_err %s on get_phy_cap\n", - i40e_stat_str(hw, err), - i40e_aq_str(hw, hw->aq.asq_last_status)); - if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_SET) - dev_dbg(&pf->pdev->dev, - "Set fc with err %s aq_err %s on set_phy_config\n", - i40e_stat_str(hw, err), - i40e_aq_str(hw, hw->aq.asq_last_status)); - if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_UPDATE) - dev_dbg(&pf->pdev->dev, - "Set fc with err %s aq_err %s on get_link_info\n", - i40e_stat_str(hw, err), - i40e_aq_str(hw, hw->aq.asq_last_status)); - /* if FDIR VSI was set up, start it now */ for (i = 0; i < pf->num_alloc_vsi; i++) { if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) { @@ -15122,6 +15100,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) { dev_info(&pdev->dev, "setup of misc vector failed: %d\n", err); + i40e_cloud_filter_exit(pf); + i40e_fdir_teardown(pf); goto err_vsis; } } diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 4aca637d4a23..903d4e8cb0a1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1793,7 +1793,7 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring, skb_record_rx_queue(skb, rx_ring->queue_index); if (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) { - u16 vlan_tag = rx_desc->wb.qword0.lo_dword.l2tag1; + __le16 vlan_tag = rx_desc->wb.qword0.lo_dword.l2tag1; __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), le16_to_cpu(vlan_tag)); @@ -3113,13 +3113,16 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags, l4_proto = ip.v4->protocol; } else if (*tx_flags & I40E_TX_FLAGS_IPV6) { + int ret; + tunnel |= I40E_TX_CTX_EXT_IP_IPV6; exthdr = ip.hdr + sizeof(*ip.v6); l4_proto = ip.v6->nexthdr; - if (l4.hdr != exthdr) - ipv6_skip_exthdr(skb, exthdr - skb->data, - &l4_proto, &frag_off); + ret = ipv6_skip_exthdr(skb, exthdr - skb->data, + &l4_proto, &frag_off); + if (ret < 0) + return -1; } /* define outer transport */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 492ce213208d..37a21fb99922 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -444,7 +444,7 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring) struct i40e_tx_desc *tx_desc; tx_desc = I40E_TX_DESC(xdp_ring, ntu); - tx_desc->cmd_type_offset_bsz |= (I40E_TX_DESC_CMD_RS << I40E_TXD_QW1_CMD_SHIFT); + tx_desc->cmd_type_offset_bsz |= cpu_to_le64(I40E_TX_DESC_CMD_RS << I40E_TXD_QW1_CMD_SHIFT); } /** diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 0a867d64d467..dc5b3c06d1e0 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -1776,7 +1776,8 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) goto err_alloc; } - if (iavf_process_config(adapter)) + err = iavf_process_config(adapter); + if (err) goto err_alloc; adapter->current_op = VIRTCHNL_OP_UNKNOWN; diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index fa1e128c24ec..619d93f8b54c 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -443,9 +443,7 @@ struct ice_pf { struct ice_hw_port_stats stats_prev; struct ice_hw hw; u8 stat_prev_loaded:1; /* has previous stats been loaded */ -#ifdef CONFIG_DCB u16 dcbx_cap; -#endif /* CONFIG_DCB */ u32 tx_timeout_count; unsigned long tx_timeout_last_recovery; u32 tx_timeout_recovery_level; diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 3124a3bf519a..952e41a1e001 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -418,6 +418,8 @@ int ice_setup_rx_ctx(struct ice_ring *ring) writel(0, ring->tail); if (ring->xsk_pool) { + bool ok; + if (!xsk_buff_can_alloc(ring->xsk_pool, num_bufs)) { dev_warn(dev, "XSK buffer pool does not provide enough addresses to fill %d buffers on Rx ring %d\n", num_bufs, ring->q_index); @@ -426,8 +428,8 @@ int ice_setup_rx_ctx(struct ice_ring *ring) return 0; } - err = ice_alloc_rx_bufs_zc(ring, num_bufs); - if (err) + ok = ice_alloc_rx_bufs_zc(ring, num_bufs); + if (!ok) dev_info(dev, "Failed to allocate some buffers on XSK buffer pool enabled Rx ring %d (pf_q %d)\n", ring->q_index, pf_q); return 0; diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c index 87f91b750d59..8c133a8be6ad 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c @@ -136,7 +136,7 @@ ice_dcbnl_getnumtcs(struct net_device *dev, int __always_unused tcid, u8 *num) if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)) return -EINVAL; - *num = IEEE_8021QAZ_MAX_TCS; + *num = pf->hw.func_caps.common_cap.maxtc; return 0; } @@ -160,6 +160,10 @@ static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode) { struct ice_pf *pf = ice_netdev_to_pf(netdev); + /* if FW LLDP agent is running, DCBNL not allowed to change mode */ + if (test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) + return ICE_DCB_NO_HW_CHG; + /* No support for LLD_MANAGED modes or CEE+IEEE */ if ((mode & DCB_CAP_DCBX_LLD_MANAGED) || ((mode & DCB_CAP_DCBX_VER_IEEE) && (mode & DCB_CAP_DCBX_VER_CEE)) || diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 69c113a4de7e..aebebd2102da 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -8,6 +8,7 @@ #include "ice_fltr.h" #include "ice_lib.h" #include "ice_dcb_lib.h" +#include struct ice_stats { char stat_string[ETH_GSTRING_LEN]; @@ -1238,6 +1239,9 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) status = ice_init_pf_dcb(pf, true); if (status) dev_warn(dev, "Fail to init DCB\n"); + + pf->dcbx_cap &= ~DCB_CAP_DCBX_LLD_MANAGED; + pf->dcbx_cap |= DCB_CAP_DCBX_HOST; } else { enum ice_status status; bool dcbx_agent_status; @@ -1280,6 +1284,9 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) if (status) dev_dbg(dev, "Fail to enable MIB change events\n"); + pf->dcbx_cap &= ~DCB_CAP_DCBX_HOST; + pf->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED; + ice_nway_reset(netdev); } } @@ -3321,6 +3328,18 @@ ice_get_channels(struct net_device *dev, struct ethtool_channels *ch) ch->max_other = ch->other_count; } +/** + * ice_get_valid_rss_size - return valid number of RSS queues + * @hw: pointer to the HW structure + * @new_size: requested RSS queues + */ +static int ice_get_valid_rss_size(struct ice_hw *hw, int new_size) +{ + struct ice_hw_common_caps *caps = &hw->func_caps.common_cap; + + return min_t(int, new_size, BIT(caps->rss_table_entry_width)); +} + /** * ice_vsi_set_dflt_rss_lut - set default RSS LUT with requested RSS size * @vsi: VSI to reconfigure RSS LUT on @@ -3348,14 +3367,10 @@ static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size) return -ENOMEM; /* set RSS LUT parameters */ - if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) vsi->rss_size = 1; - } else { - struct ice_hw_common_caps *caps = &hw->func_caps.common_cap; - - vsi->rss_size = min_t(int, req_rss_size, - BIT(caps->rss_table_entry_width)); - } + else + vsi->rss_size = ice_get_valid_rss_size(hw, req_rss_size); /* create/set RSS LUT */ ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size); @@ -3434,9 +3449,12 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch) ice_vsi_recfg_qs(vsi, new_rx, new_tx); - if (new_rx && !netif_is_rxfh_configured(dev)) + if (!netif_is_rxfh_configured(dev)) return ice_vsi_set_dflt_rss_lut(vsi, new_rx); + /* Update rss_size due to change in Rx queues */ + vsi->rss_size = ice_get_valid_rss_size(&pf->hw, new_rx); + return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index ec7f6c64132e..b3161c5def46 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -1878,6 +1878,29 @@ static int ice_vc_get_ver_msg(struct ice_vf *vf, u8 *msg) sizeof(struct virtchnl_version_info)); } +/** + * ice_vc_get_max_frame_size - get max frame size allowed for VF + * @vf: VF used to determine max frame size + * + * Max frame size is determined based on the current port's max frame size and + * whether a port VLAN is configured on this VF. The VF is not aware whether + * it's in a port VLAN so the PF needs to account for this in max frame size + * checks and sending the max frame size to the VF. + */ +static u16 ice_vc_get_max_frame_size(struct ice_vf *vf) +{ + struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx]; + struct ice_port_info *pi = vsi->port_info; + u16 max_frame_size; + + max_frame_size = pi->phy.link_info.max_frame_size; + + if (vf->port_vlan_info) + max_frame_size -= VLAN_HLEN; + + return max_frame_size; +} + /** * ice_vc_get_vf_res_msg * @vf: pointer to the VF info @@ -1960,6 +1983,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg) vfres->max_vectors = pf->num_msix_per_vf; vfres->rss_key_size = ICE_VSIQF_HKEY_ARRAY_SIZE; vfres->rss_lut_size = ICE_VSIQF_HLUT_ARRAY_SIZE; + vfres->max_mtu = ice_vc_get_max_frame_size(vf); vfres->vsi_res[0].vsi_id = vf->lan_vsi_num; vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV; @@ -2952,6 +2976,8 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) /* copy Rx queue info from VF into VSI */ if (qpi->rxq.ring_len > 0) { + u16 max_frame_size = ice_vc_get_max_frame_size(vf); + num_rxq++; vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr; vsi->rx_rings[i]->count = qpi->rxq.ring_len; @@ -2964,7 +2990,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) } vsi->rx_buf_len = qpi->rxq.databuffer_size; vsi->rx_rings[i]->rx_buf_len = vsi->rx_buf_len; - if (qpi->rxq.max_pkt_size >= (16 * 1024) || + if (qpi->rxq.max_pkt_size > max_frame_size || qpi->rxq.max_pkt_size < 64) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; goto error_param; @@ -2972,6 +2998,11 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) } vsi->max_frame = qpi->rxq.max_pkt_size; + /* add space for the port VLAN since the VF driver is not + * expected to account for it in the MTU calculation + */ + if (vf->port_vlan_info) + vsi->max_frame += VLAN_HLEN; } /* VF can request to configure less than allocated queues or default diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 1782146db644..69ee1a8e87ab 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -408,18 +408,18 @@ int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid) * This function allocates a number of Rx buffers from the fill ring * or the internal recycle mechanism and places them on the Rx ring. * - * Returns false if all allocations were successful, true if any fail. + * Returns true if all allocations were successful, false if any fail. */ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count) { union ice_32b_rx_flex_desc *rx_desc; u16 ntu = rx_ring->next_to_use; struct ice_rx_buf *rx_buf; - bool ret = false; + bool ok = true; dma_addr_t dma; if (!count) - return false; + return true; rx_desc = ICE_RX_DESC(rx_ring, ntu); rx_buf = &rx_ring->rx_buf[ntu]; @@ -427,7 +427,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count) do { rx_buf->xdp = xsk_buff_alloc(rx_ring->xsk_pool); if (!rx_buf->xdp) { - ret = true; + ok = false; break; } @@ -452,7 +452,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count) ice_release_rx_desc(rx_ring, ntu); } - return ret; + return ok; } /** diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index aaa954aae574..7bda8c5edea5 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -748,8 +748,8 @@ void igb_ptp_suspend(struct igb_adapter *adapter); void igb_ptp_rx_hang(struct igb_adapter *adapter); void igb_ptp_tx_hang(struct igb_adapter *adapter); void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb); -void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, - struct sk_buff *skb); +int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, + struct sk_buff *skb); int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); void igb_set_flag_queue_pairs(struct igb_adapter *, const u32); diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 03f78fdb0dcd..0e8c17f7af28 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -8232,7 +8232,8 @@ static inline bool igb_page_is_reserved(struct page *page) return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } -static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer) +static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, + int rx_buf_pgcnt) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; @@ -8243,7 +8244,7 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer) #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) + if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1)) return false; #else #define IGB_LAST_OFFSET \ @@ -8319,9 +8320,10 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring, return NULL; if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) { - igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb); - xdp->data += IGB_TS_HDR_LEN; - size -= IGB_TS_HDR_LEN; + if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb)) { + xdp->data += IGB_TS_HDR_LEN; + size -= IGB_TS_HDR_LEN; + } } /* Determine available headroom for copy */ @@ -8382,8 +8384,8 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring, /* pull timestamp out of packet data */ if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { - igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb); - __skb_pull(skb, IGB_TS_HDR_LEN); + if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb)) + __skb_pull(skb, IGB_TS_HDR_LEN); } /* update buffer offset */ @@ -8632,11 +8634,17 @@ static unsigned int igb_rx_offset(struct igb_ring *rx_ring) } static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring, - const unsigned int size) + const unsigned int size, int *rx_buf_pgcnt) { struct igb_rx_buffer *rx_buffer; rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + *rx_buf_pgcnt = +#if (PAGE_SIZE < 8192) + page_count(rx_buffer->page); +#else + 0; +#endif prefetchw(rx_buffer->page); /* we are reusing so sync this buffer for CPU use */ @@ -8652,9 +8660,9 @@ static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring, } static void igb_put_rx_buffer(struct igb_ring *rx_ring, - struct igb_rx_buffer *rx_buffer) + struct igb_rx_buffer *rx_buffer, int rx_buf_pgcnt) { - if (igb_can_reuse_rx_page(rx_buffer)) { + if (igb_can_reuse_rx_page(rx_buffer, rx_buf_pgcnt)) { /* hand second half of page back to the ring */ igb_reuse_rx_page(rx_ring, rx_buffer); } else { @@ -8681,6 +8689,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) u16 cleaned_count = igb_desc_unused(rx_ring); unsigned int xdp_xmit = 0; struct xdp_buff xdp; + int rx_buf_pgcnt; xdp.rxq = &rx_ring->xdp_rxq; @@ -8711,7 +8720,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) */ dma_rmb(); - rx_buffer = igb_get_rx_buffer(rx_ring, size); + rx_buffer = igb_get_rx_buffer(rx_ring, size, &rx_buf_pgcnt); /* retrieve a buffer from the ring */ if (!skb) { @@ -8754,7 +8763,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) break; } - igb_put_rx_buffer(rx_ring, rx_buffer); + igb_put_rx_buffer(rx_ring, rx_buffer, rx_buf_pgcnt); cleaned_count++; /* fetch next buffer in frame if non-eop */ diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 7cc5428c3b3d..86a576201f5f 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -856,6 +856,9 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) dev_kfree_skb_any(skb); } +#define IGB_RET_PTP_DISABLED 1 +#define IGB_RET_PTP_INVALID 2 + /** * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp * @q_vector: Pointer to interrupt specific structure @@ -864,19 +867,29 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) * * This function is meant to retrieve a timestamp from the first buffer of an * incoming frame. The value is stored in little endian format starting on - * byte 8. + * byte 8 + * + * Returns: 0 if success, nonzero if failure **/ -void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, - struct sk_buff *skb) +int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, + struct sk_buff *skb) { - __le64 *regval = (__le64 *)va; struct igb_adapter *adapter = q_vector->adapter; + __le64 *regval = (__le64 *)va; int adjust = 0; + if (!(adapter->ptp_flags & IGB_PTP_ENABLED)) + return IGB_RET_PTP_DISABLED; + /* The timestamp is recorded in little endian format. * DWORD: 0 1 2 3 * Field: Reserved Reserved SYSTIML SYSTIMH */ + + /* check reserved dwords are zero, be/le doesn't matter for zero */ + if (regval[0]) + return IGB_RET_PTP_INVALID; + igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), le64_to_cpu(regval[1])); @@ -896,6 +909,8 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, } skb_hwtstamps(skb)->hwtstamp = ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust); + + return 0; } /** @@ -906,13 +921,15 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va, * This function is meant to retrieve a timestamp from the internal registers * of the adapter and store it in the skb. **/ -void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, - struct sk_buff *skb) +void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb) { struct igb_adapter *adapter = q_vector->adapter; struct e1000_hw *hw = &adapter->hw; - u64 regval; int adjust = 0; + u64 regval; + + if (!(adapter->ptp_flags & IGB_PTP_ENABLED)) + return; /* If this bit is set, then the RX registers contain the time stamp. No * other packet will be time stamped until we read these registers, so diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 35baae900c1f..6dca67d9c25d 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -545,7 +545,7 @@ void igc_ptp_init(struct igc_adapter *adapter); void igc_ptp_reset(struct igc_adapter *adapter); void igc_ptp_suspend(struct igc_adapter *adapter); void igc_ptp_stop(struct igc_adapter *adapter); -void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, void *va, +void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, __le32 *va, struct sk_buff *skb); int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr); int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr); diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index ec8cd69d4992..da259cd59add 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -1695,6 +1695,9 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, Autoneg); } + /* Set pause flow control settings */ + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + switch (hw->fc.requested_mode) { case igc_fc_full: ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); @@ -1709,9 +1712,7 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev, Asym_Pause); break; default: - ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause); - ethtool_link_ksettings_add_link_mode(cmd, advertising, - Asym_Pause); + break; } status = pm_runtime_suspended(&adapter->pdev->dev) ? diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index afd6a62da29d..93874e930abf 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -3847,10 +3847,19 @@ static void igc_reset_task(struct work_struct *work) adapter = container_of(work, struct igc_adapter, reset_task); + rtnl_lock(); + /* If we're already down or resetting, just bail */ + if (test_bit(__IGC_DOWN, &adapter->state) || + test_bit(__IGC_RESETTING, &adapter->state)) { + rtnl_unlock(); + return; + } + igc_rings_dump(adapter); igc_regs_dump(adapter); netdev_err(adapter->netdev, "Reset adapter\n"); igc_reinit_locked(adapter); + rtnl_unlock(); } /** diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index ac0b9c85da7c..545f4d0e67cf 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -152,46 +152,54 @@ static void igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter, } /** - * igc_ptp_rx_pktstamp - retrieve Rx per packet timestamp + * igc_ptp_rx_pktstamp - Retrieve timestamp from Rx packet buffer * @q_vector: Pointer to interrupt specific structure * @va: Pointer to address containing Rx buffer * @skb: Buffer containing timestamp and packet * - * This function is meant to retrieve the first timestamp from the - * first buffer of an incoming frame. The value is stored in little - * endian format starting on byte 0. There's a second timestamp - * starting on byte 8. - **/ -void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, void *va, + * This function retrieves the timestamp saved in the beginning of packet + * buffer. While two timestamps are available, one in timer0 reference and the + * other in timer1 reference, this function considers only the timestamp in + * timer0 reference. + */ +void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, __le32 *va, struct sk_buff *skb) { struct igc_adapter *adapter = q_vector->adapter; - __le64 *regval = (__le64 *)va; - int adjust = 0; - - /* The timestamp is recorded in little endian format. - * DWORD: | 0 | 1 | 2 | 3 - * Field: | Timer0 Low | Timer0 High | Timer1 Low | Timer1 High + u64 regval; + int adjust; + + /* Timestamps are saved in little endian at the beginning of the packet + * buffer following the layout: + * + * DWORD: | 0 | 1 | 2 | 3 | + * Field: | Timer1 SYSTIML | Timer1 SYSTIMH | Timer0 SYSTIML | Timer0 SYSTIMH | + * + * SYSTIML holds the nanoseconds part while SYSTIMH holds the seconds + * part of the timestamp. */ - igc_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), - le64_to_cpu(regval[0])); - - /* adjust timestamp for the RX latency based on link speed */ - if (adapter->hw.mac.type == igc_i225) { - switch (adapter->link_speed) { - case SPEED_10: - adjust = IGC_I225_RX_LATENCY_10; - break; - case SPEED_100: - adjust = IGC_I225_RX_LATENCY_100; - break; - case SPEED_1000: - adjust = IGC_I225_RX_LATENCY_1000; - break; - case SPEED_2500: - adjust = IGC_I225_RX_LATENCY_2500; - break; - } + regval = le32_to_cpu(va[2]); + regval |= (u64)le32_to_cpu(va[3]) << 32; + igc_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); + + /* Adjust timestamp for the RX latency based on link speed */ + switch (adapter->link_speed) { + case SPEED_10: + adjust = IGC_I225_RX_LATENCY_10; + break; + case SPEED_100: + adjust = IGC_I225_RX_LATENCY_100; + break; + case SPEED_1000: + adjust = IGC_I225_RX_LATENCY_1000; + break; + case SPEED_2500: + adjust = IGC_I225_RX_LATENCY_2500; + break; + default: + adjust = 0; + netdev_warn_once(adapter->netdev, "Imprecise timestamp\n"); + break; } skb_hwtstamps(skb)->hwtstamp = ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c index eca73526ac86..54d47265a7ac 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c @@ -575,6 +575,11 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs) return -EINVAL; } + if (xs->props.mode != XFRM_MODE_TRANSPORT) { + netdev_err(dev, "Unsupported mode for ipsec offload\n"); + return -EINVAL; + } + if (ixgbe_ipsec_check_mgmt_ip(xs)) { netdev_err(dev, "IPsec IP addr clash with mgmt filters\n"); return -EINVAL; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 393d1c2cd853..e9c2d28efc81 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -9582,8 +9582,10 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter, ixgbe_atr_compute_perfect_hash_82599(&input->filter, mask); err = ixgbe_fdir_write_perfect_filter_82599(hw, &input->filter, input->sw_idx, queue); - if (!err) - ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx); + if (err) + goto err_out_w_lock; + + ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx); spin_unlock(&adapter->fdir_perfect_lock); if ((uhtid != 0x800) && (adapter->jump_tables[uhtid])) diff --git a/drivers/net/ethernet/intel/ixgbevf/ipsec.c b/drivers/net/ethernet/intel/ixgbevf/ipsec.c index 5170dd9d8705..caaea2c920a6 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ipsec.c +++ b/drivers/net/ethernet/intel/ixgbevf/ipsec.c @@ -272,6 +272,11 @@ static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs) return -EINVAL; } + if (xs->props.mode != XFRM_MODE_TRANSPORT) { + netdev_err(dev, "Unsupported mode for ipsec offload\n"); + return -EINVAL; + } + if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) { struct rx_sa rsa; diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index bc4d8d144401..fd5b33646ea7 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -3432,7 +3432,9 @@ static int mvneta_txq_sw_init(struct mvneta_port *pp, return -ENOMEM; /* Setup XPS mapping */ - if (txq_number > 1) + if (pp->neta_armada3700) + cpu = 0; + else if (txq_number > 1) cpu = txq->id % num_present_cpus(); else cpu = pp->rxq_def % num_present_cpus(); @@ -4210,6 +4212,11 @@ static int mvneta_cpu_online(unsigned int cpu, struct hlist_node *node) node_online); struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu); + /* Armada 3700's per-cpu interrupt for mvneta is broken, all interrupts + * are routed to CPU 0, so we don't need all the cpu-hotplug support + */ + if (pp->neta_armada3700) + return 0; spin_lock(&pp->lock); /* diff --git a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h index b192692b4fc4..5c372d2c24a1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h @@ -13499,8 +13499,6 @@ static struct npc_mcam_kex npc_mkex_default = { [NPC_LT_LC_IP] = { /* SIP+DIP: 8 bytes, KW2[63:0] */ KEX_LD_CFG(0x07, 0xc, 0x1, 0x0, 0x10), - /* TOS: 1 byte, KW1[63:56] */ - KEX_LD_CFG(0x0, 0x1, 0x1, 0x0, 0xf), }, /* Layer C: IPv6 */ [NPC_LT_LC_IP6] = { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index e8fd712860a1..e3fc6d1c0ec3 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2358,8 +2358,10 @@ static void rvu_unregister_interrupts(struct rvu *rvu) INTR_MASK(rvu->hw->total_pfs) & ~1ULL); for (irq = 0; irq < rvu->num_vec; irq++) { - if (rvu->irq_allocated[irq]) + if (rvu->irq_allocated[irq]) { free_irq(pci_irq_vector(rvu->pdev, irq), rvu); + rvu->irq_allocated[irq] = false; + } } pci_free_irq_vectors(rvu->pdev); @@ -2873,8 +2875,8 @@ static void rvu_remove(struct pci_dev *pdev) struct rvu *rvu = pci_get_drvdata(pdev); rvu_dbg_exit(rvu); - rvu_unregister_interrupts(rvu); rvu_unregister_dl(rvu); + rvu_unregister_interrupts(rvu); rvu_flr_wq_destroy(rvu); rvu_cgx_exit(rvu); rvu_fwdata_exit(rvu); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c index d27543c1a166..0488651a68d0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c @@ -150,12 +150,14 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { - int index, off = 0, flag = 0, go_back = 0, off_prev; + int index, off = 0, flag = 0, go_back = 0, len = 0; struct rvu *rvu = filp->private_data; int lf, pf, vf, pcifunc; struct rvu_block block; int bytes_not_copied; + int lf_str_size = 12; int buf_size = 2048; + char *lfs; char *buf; /* don't allow partial reads */ @@ -165,12 +167,20 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, buf = kzalloc(buf_size, GFP_KERNEL); if (!buf) return -ENOSPC; - off += scnprintf(&buf[off], buf_size - 1 - off, "\npcifunc\t\t"); + + lfs = kzalloc(lf_str_size, GFP_KERNEL); + if (!lfs) { + kfree(buf); + return -ENOMEM; + } + off += scnprintf(&buf[off], buf_size - 1 - off, "%-*s", lf_str_size, + "pcifunc"); for (index = 0; index < BLK_COUNT; index++) - if (strlen(rvu->hw->block[index].name)) - off += scnprintf(&buf[off], buf_size - 1 - off, - "%*s\t", (index - 1) * 2, - rvu->hw->block[index].name); + if (strlen(rvu->hw->block[index].name)) { + off += scnprintf(&buf[off], buf_size - 1 - off, + "%-*s", lf_str_size, + rvu->hw->block[index].name); + } off += scnprintf(&buf[off], buf_size - 1 - off, "\n"); for (pf = 0; pf < rvu->hw->total_pfs; pf++) { for (vf = 0; vf <= rvu->hw->total_vfs; vf++) { @@ -179,14 +189,15 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, continue; if (vf) { + sprintf(lfs, "PF%d:VF%d", pf, vf - 1); go_back = scnprintf(&buf[off], buf_size - 1 - off, - "PF%d:VF%d\t\t", pf, - vf - 1); + "%-*s", lf_str_size, lfs); } else { + sprintf(lfs, "PF%d", pf); go_back = scnprintf(&buf[off], buf_size - 1 - off, - "PF%d\t\t", pf); + "%-*s", lf_str_size, lfs); } off += go_back; @@ -194,20 +205,22 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, block = rvu->hw->block[index]; if (!strlen(block.name)) continue; - off_prev = off; + len = 0; + lfs[len] = '\0'; for (lf = 0; lf < block.lf.max; lf++) { if (block.fn_map[lf] != pcifunc) continue; flag = 1; - off += scnprintf(&buf[off], buf_size - 1 - - off, "%3d,", lf); + len += sprintf(&lfs[len], "%d,", lf); } - if (flag && off_prev != off) - off--; - else - go_back++; + + if (flag) + len--; + lfs[len] = '\0'; off += scnprintf(&buf[off], buf_size - 1 - off, - "\t"); + "%-*s", lf_str_size, lfs); + if (!strlen(lfs)) + go_back += lf_str_size; } if (!flag) off -= go_back; @@ -219,6 +232,7 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp, } bytes_not_copied = copy_to_user(buffer, buf, off); + kfree(lfs); kfree(buf); if (bytes_not_copied) @@ -385,7 +399,7 @@ static ssize_t rvu_dbg_qsize_write(struct file *filp, u16 pcifunc; int ret, lf; - cmd_buf = memdup_user(buffer, count); + cmd_buf = memdup_user(buffer, count + 1); if (IS_ERR(cmd_buf)) return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c index 5cf9b7a907ae..b81539f3b2ac 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c @@ -2490,10 +2490,10 @@ int rvu_mbox_handler_npc_mcam_free_counter(struct rvu *rvu, index = find_next_bit(mcam->bmap, mcam->bmap_entries, entry); if (index >= mcam->bmap_entries) break; + entry = index + 1; if (mcam->entry2cntr_map[index] != req->cntr) continue; - entry = index + 1; npc_unmap_mcam_entry_and_cntr(rvu, mcam, blkaddr, index, req->cntr); } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index 634d60655a74..07e841df5678 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -1625,6 +1625,7 @@ int otx2_stop(struct net_device *netdev) struct otx2_nic *pf = netdev_priv(netdev); struct otx2_cq_poll *cq_poll = NULL; struct otx2_qset *qset = &pf->qset; + struct otx2_rss_info *rss; int qidx, vec, wrk; netif_carrier_off(netdev); @@ -1637,6 +1638,10 @@ int otx2_stop(struct net_device *netdev) /* First stop packet Rx/Tx */ otx2_rxtx_enable(pf, false); + /* Clear RSS enable flag */ + rss = &pf->hw.rss_info; + rss->enable = false; + /* Cleanup Queue IRQ */ vec = pci_irq_vector(pf->pdev, pf->hw.nix_msixoff + NIX_LF_QINT_VEC_START); diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c index a8641a407c06..96d2891f1675 100644 --- a/drivers/net/ethernet/mediatek/mtk_star_emac.c +++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c @@ -1225,8 +1225,6 @@ static int mtk_star_receive_packet(struct mtk_star_priv *priv) goto push_new_skb; } - desc_data.dma_addr = new_dma_addr; - /* We can't fail anymore at this point: it's safe to unmap the skb. */ mtk_star_dma_unmap_rx(priv, &desc_data); @@ -1236,6 +1234,9 @@ static int mtk_star_receive_packet(struct mtk_star_priv *priv) desc_data.skb->dev = ndev; netif_receive_skb(desc_data.skb); + /* update dma_addr for new skb */ + desc_data.dma_addr = new_dma_addr; + push_new_skb: desc_data.len = skb_tailroom(new_skb); desc_data.skb = new_skb; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 23849f2b9c25..1434df66fcf2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -47,7 +47,7 @@ #define EN_ETHTOOL_SHORT_MASK cpu_to_be16(0xffff) #define EN_ETHTOOL_WORD_MASK cpu_to_be32(0xffffffff) -static int mlx4_en_moderation_update(struct mlx4_en_priv *priv) +int mlx4_en_moderation_update(struct mlx4_en_priv *priv) { int i, t; int err = 0; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 32aad4d32b88..c7504223a12a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -3558,6 +3558,8 @@ int mlx4_en_reset_config(struct net_device *dev, en_err(priv, "Failed starting port\n"); } + if (!err) + err = mlx4_en_moderation_update(priv); out: mutex_unlock(&mdev->state_lock); kfree(tmp); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index e8ed23190de0..f3d1a20201ef 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -775,6 +775,7 @@ void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev); #define DEV_FEATURE_CHANGED(dev, new_features, feature) \ ((dev->features & feature) ^ (new_features & feature)) +int mlx4_en_moderation_update(struct mlx4_en_priv *priv); int mlx4_en_reset_config(struct net_device *dev, struct hwtstamp_config ts_config, netdev_features_t new_features); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 394f43add85c..a99e71bc7b3c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -4986,6 +4986,7 @@ static int mlx4_do_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule if (!fs_rule->mirr_mbox) { mlx4_err(dev, "rule mirroring mailbox is null\n"); + mlx4_free_cmd_mailbox(dev, mailbox); return -EINVAL; } memcpy(mailbox->buf, fs_rule->mirr_mbox, fs_rule->mirr_mbox_size); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 3261d0dc1104..41474e42a819 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -128,6 +128,11 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, { struct mlx5_core_dev *dev = devlink_priv(devlink); + if (mlx5_lag_is_active(dev)) { + NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode\n"); + return -EOPNOTSUPP; + } + switch (action) { case DEVLINK_RELOAD_ACTION_DRIVER_REINIT: mlx5_unload_one(dev, false); @@ -273,6 +278,10 @@ static int mlx5_devlink_enable_roce_validate(struct devlink *devlink, u32 id, NL_SET_ERR_MSG_MOD(extack, "Device doesn't support RoCE"); return -EOPNOTSUPP; } + if (mlx5_core_is_mp_slave(dev) || mlx5_lag_is_active(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multi port slave/Lag device can't configure RoCE"); + return -EOPNOTSUPP; + } return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 055baf3b6cb1..f258f2f9b8cf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -90,14 +90,15 @@ struct page_pool; MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) -#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2) +#define MLX5_ALIGN_MTTS(mtts) (ALIGN(mtts, 8)) +#define MLX5_ALIGNED_MTTS_OCTW(mtts) ((mtts) / 2) +#define MLX5_MTT_OCTW(mtts) (MLX5_ALIGNED_MTTS_OCTW(MLX5_ALIGN_MTTS(mtts))) /* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between * WQEs, This page will absorb write overflow by the hardware, when * receiving packets larger than MTU. These oversize packets are * dropped by the driver at a later stage. */ -#define MLX5E_REQUIRED_WQE_MTTS (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE + 1, 8)) -#define MLX5E_LOG_ALIGNED_MPWQE_PPW (ilog2(MLX5E_REQUIRED_WQE_MTTS)) +#define MLX5E_REQUIRED_WQE_MTTS (MLX5_ALIGN_MTTS(MLX5_MPWRQ_PAGES_PER_WQE + 1)) #define MLX5E_REQUIRED_MTTS(wqes) (wqes * MLX5E_REQUIRED_WQE_MTTS) #define MLX5E_MAX_RQ_NUM_MTTS \ ((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 6bc6b48a56dc..b42396df3111 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include "lib/fs_chains.h" @@ -51,11 +52,11 @@ struct mlx5_tc_ct_priv { struct mlx5_flow_table *ct_nat; struct mlx5_flow_table *post_ct; struct mutex control_lock; /* guards parallel adds/dels */ - struct mutex shared_counter_lock; struct mapping_ctx *zone_mapping; struct mapping_ctx *labels_mapping; enum mlx5_flow_namespace_type ns_type; struct mlx5_fs_chains *chains; + spinlock_t ht_lock; /* protects ft entries */ }; struct mlx5_ct_flow { @@ -124,6 +125,10 @@ struct mlx5_ct_counter { bool is_shared; }; +enum { + MLX5_CT_ENTRY_FLAG_VALID, +}; + struct mlx5_ct_entry { struct rhash_head node; struct rhash_head tuple_node; @@ -134,6 +139,12 @@ struct mlx5_ct_entry { struct mlx5_ct_tuple tuple; struct mlx5_ct_tuple tuple_nat; struct mlx5_ct_zone_rule zone_rules[2]; + + struct mlx5_tc_ct_priv *ct_priv; + struct work_struct work; + + refcount_t refcnt; + unsigned long flags; }; static const struct rhashtable_params cts_ht_params = { @@ -740,6 +751,87 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv, return err; } +static bool +mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry) +{ + return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); +} + +static struct mlx5_ct_entry * +mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple) +{ + struct mlx5_ct_entry *entry; + + entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple, + tuples_ht_params); + if (entry && mlx5_tc_ct_entry_valid(entry) && + refcount_inc_not_zero(&entry->refcnt)) { + return entry; + } else if (!entry) { + entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, + tuple, tuples_nat_ht_params); + if (entry && mlx5_tc_ct_entry_valid(entry) && + refcount_inc_not_zero(&entry->refcnt)) + return entry; + } + + return entry ? ERR_PTR(-EINVAL) : NULL; +} + +static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry) +{ + struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; + + rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, + &entry->tuple_nat_node, + tuples_nat_ht_params); + rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, + tuples_ht_params); +} + +static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry) +{ + struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv; + + mlx5_tc_ct_entry_del_rules(ct_priv, entry); + + spin_lock_bh(&ct_priv->ht_lock); + mlx5_tc_ct_entry_remove_from_tuples(entry); + spin_unlock_bh(&ct_priv->ht_lock); + + mlx5_tc_ct_counter_put(ct_priv, entry); + kfree(entry); +} + +static void +mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) +{ + if (!refcount_dec_and_test(&entry->refcnt)) + return; + + mlx5_tc_ct_entry_del(entry); +} + +static void mlx5_tc_ct_entry_del_work(struct work_struct *work) +{ + struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work); + + mlx5_tc_ct_entry_del(entry); +} + +static void +__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry) +{ + struct mlx5e_priv *priv; + + if (!refcount_dec_and_test(&entry->refcnt)) + return; + + priv = netdev_priv(entry->ct_priv->netdev); + INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work); + queue_work(priv->wq, &entry->work); +} + static struct mlx5_ct_counter * mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv) { @@ -792,16 +884,26 @@ mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv, } /* Use the same counter as the reverse direction */ - mutex_lock(&ct_priv->shared_counter_lock); - rev_entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &rev_tuple, - tuples_ht_params); - if (rev_entry) { - if (refcount_inc_not_zero(&rev_entry->counter->refcount)) { - mutex_unlock(&ct_priv->shared_counter_lock); - return rev_entry->counter; - } + spin_lock_bh(&ct_priv->ht_lock); + rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple); + + if (IS_ERR(rev_entry)) { + spin_unlock_bh(&ct_priv->ht_lock); + goto create_counter; } - mutex_unlock(&ct_priv->shared_counter_lock); + + if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) { + ct_dbg("Using shared counter entry=0x%p rev=0x%p\n", entry, rev_entry); + shared_counter = rev_entry->counter; + spin_unlock_bh(&ct_priv->ht_lock); + + mlx5_tc_ct_entry_put(rev_entry); + return shared_counter; + } + + spin_unlock_bh(&ct_priv->ht_lock); + +create_counter: shared_counter = mlx5_tc_ct_counter_create(ct_priv); if (IS_ERR(shared_counter)) { @@ -866,10 +968,14 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, if (!meta_action) return -EOPNOTSUPP; - entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, - cts_ht_params); - if (entry) - return 0; + spin_lock_bh(&ct_priv->ht_lock); + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); + if (entry && refcount_inc_not_zero(&entry->refcnt)) { + spin_unlock_bh(&ct_priv->ht_lock); + mlx5_tc_ct_entry_put(entry); + return -EEXIST; + } + spin_unlock_bh(&ct_priv->ht_lock); entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) @@ -878,6 +984,8 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, entry->tuple.zone = ft->zone; entry->cookie = flow->cookie; entry->restore_cookie = meta_action->ct_metadata.cookie; + refcount_set(&entry->refcnt, 2); + entry->ct_priv = ct_priv; err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule); if (err) @@ -888,35 +996,40 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, if (err) goto err_set; - err = rhashtable_insert_fast(&ct_priv->ct_tuples_ht, - &entry->tuple_node, - tuples_ht_params); + spin_lock_bh(&ct_priv->ht_lock); + + err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node, + cts_ht_params); + if (err) + goto err_entries; + + err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht, + &entry->tuple_node, + tuples_ht_params); if (err) goto err_tuple; if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) { - err = rhashtable_insert_fast(&ct_priv->ct_tuples_nat_ht, - &entry->tuple_nat_node, - tuples_nat_ht_params); + err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht, + &entry->tuple_nat_node, + tuples_nat_ht_params); if (err) goto err_tuple_nat; } + spin_unlock_bh(&ct_priv->ht_lock); err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry, ft->zone_restore_id); if (err) goto err_rules; - err = rhashtable_insert_fast(&ft->ct_entries_ht, &entry->node, - cts_ht_params); - if (err) - goto err_insert; + set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags); + mlx5_tc_ct_entry_put(entry); /* this function reference */ return 0; -err_insert: - mlx5_tc_ct_entry_del_rules(ct_priv, entry); err_rules: + spin_lock_bh(&ct_priv->ht_lock); if (mlx5_tc_ct_entry_has_nat(entry)) rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, &entry->tuple_nat_node, tuples_nat_ht_params); @@ -925,47 +1038,43 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft, &entry->tuple_node, tuples_ht_params); err_tuple: + rhashtable_remove_fast(&ft->ct_entries_ht, + &entry->node, + cts_ht_params); +err_entries: + spin_unlock_bh(&ct_priv->ht_lock); err_set: kfree(entry); - netdev_warn(ct_priv->netdev, - "Failed to offload ct entry, err: %d\n", err); + if (err != -EEXIST) + netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err); return err; } -static void -mlx5_tc_ct_del_ft_entry(struct mlx5_tc_ct_priv *ct_priv, - struct mlx5_ct_entry *entry) -{ - mlx5_tc_ct_entry_del_rules(ct_priv, entry); - mutex_lock(&ct_priv->shared_counter_lock); - if (mlx5_tc_ct_entry_has_nat(entry)) - rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht, - &entry->tuple_nat_node, - tuples_nat_ht_params); - rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node, - tuples_ht_params); - mutex_unlock(&ct_priv->shared_counter_lock); - mlx5_tc_ct_counter_put(ct_priv, entry); - -} - static int mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft, struct flow_cls_offload *flow) { + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; unsigned long cookie = flow->cookie; struct mlx5_ct_entry *entry; - entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, - cts_ht_params); - if (!entry) + spin_lock_bh(&ct_priv->ht_lock); + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); + if (!entry) { + spin_unlock_bh(&ct_priv->ht_lock); return -ENOENT; + } - mlx5_tc_ct_del_ft_entry(ft->ct_priv, entry); - WARN_ON(rhashtable_remove_fast(&ft->ct_entries_ht, - &entry->node, - cts_ht_params)); - kfree(entry); + if (!mlx5_tc_ct_entry_valid(entry)) { + spin_unlock_bh(&ct_priv->ht_lock); + return -EINVAL; + } + + rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params); + mlx5_tc_ct_entry_remove_from_tuples(entry); + spin_unlock_bh(&ct_priv->ht_lock); + + mlx5_tc_ct_entry_put(entry); return 0; } @@ -974,19 +1083,30 @@ static int mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft, struct flow_cls_offload *f) { + struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv; unsigned long cookie = f->cookie; struct mlx5_ct_entry *entry; u64 lastuse, packets, bytes; - entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, - cts_ht_params); - if (!entry) + spin_lock_bh(&ct_priv->ht_lock); + entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params); + if (!entry) { + spin_unlock_bh(&ct_priv->ht_lock); return -ENOENT; + } + + if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) { + spin_unlock_bh(&ct_priv->ht_lock); + return -EINVAL; + } + + spin_unlock_bh(&ct_priv->ht_lock); mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse); flow_stats_update(&f->stats, bytes, packets, 0, lastuse, FLOW_ACTION_HW_STATS_DELAYED); + mlx5_tc_ct_entry_put(entry); return 0; } @@ -1062,7 +1182,8 @@ int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec) mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG, &ctstate, &ctstate_mask); - if (ctstate_mask) + + if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT) return -EOPNOTSUPP; ctstate_mask |= MLX5_CT_STATE_TRK_BIT; @@ -1478,11 +1599,9 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone, static void mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg) { - struct mlx5_tc_ct_priv *ct_priv = arg; struct mlx5_ct_entry *entry = ptr; - mlx5_tc_ct_del_ft_entry(ct_priv, entry); - kfree(entry); + mlx5_tc_ct_entry_put(entry); } static void @@ -1960,6 +2079,7 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, goto err_mapping_labels; } + spin_lock_init(&ct_priv->ht_lock); ct_priv->ns_type = ns_type; ct_priv->chains = chains; ct_priv->netdev = priv->netdev; @@ -1994,7 +2114,6 @@ mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains, idr_init(&ct_priv->fte_ids); mutex_init(&ct_priv->control_lock); - mutex_init(&ct_priv->shared_counter_lock); rhashtable_init(&ct_priv->zone_ht, &zone_params); rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params); rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params); @@ -2037,7 +2156,6 @@ mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv) rhashtable_destroy(&ct_priv->ct_tuples_nat_ht); rhashtable_destroy(&ct_priv->zone_ht); mutex_destroy(&ct_priv->control_lock); - mutex_destroy(&ct_priv->shared_counter_lock); idr_destroy(&ct_priv->fte_ids); kfree(ct_priv); } @@ -2059,14 +2177,22 @@ mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv, if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone)) return false; - entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, &tuple, - tuples_ht_params); - if (!entry) - entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht, - &tuple, tuples_nat_ht_params); - if (!entry) + spin_lock(&ct_priv->ht_lock); + + entry = mlx5_tc_ct_entry_get(ct_priv, &tuple); + if (!entry) { + spin_unlock(&ct_priv->ht_lock); + return false; + } + + if (IS_ERR(entry)) { + spin_unlock(&ct_priv->ht_lock); return false; + } + spin_unlock(&ct_priv->ht_lock); tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie); + __mlx5_tc_ct_entry_put(entry); + return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c index e472ed0eacfb..7ed3f9f79f11 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c @@ -227,6 +227,10 @@ static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv, option_key = (struct geneve_opt *)&enc_opts.key->data[0]; option_mask = (struct geneve_opt *)&enc_opts.mask->data[0]; + if (option_mask->opt_class == 0 && option_mask->type == 0 && + !memchr_inv(option_mask->opt_data, 0, option_mask->length * 4)) + return 0; + if (option_key->length > max_tlv_option_data_len) { NL_SET_ERR_MSG_MOD(extack, "Matching on GENEVE options: unsupported option len"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index d487e5e37162..8d991c3b7a50 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -83,7 +83,7 @@ static inline void mlx5e_xdp_tx_disable(struct mlx5e_priv *priv) clear_bit(MLX5E_STATE_XDP_TX_ENABLED, &priv->state); /* Let other device's napi(s) and XSK wakeups see our new state. */ - synchronize_rcu(); + synchronize_net(); } static inline bool mlx5e_xdp_tx_is_enabled(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index d87c345878d3..f4bce1365639 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -111,7 +111,7 @@ int mlx5e_open_xsk(struct mlx5e_priv *priv, struct mlx5e_params *params, void mlx5e_close_xsk(struct mlx5e_channel *c) { clear_bit(MLX5E_CHANNEL_STATE_XSK, c->state); - synchronize_rcu(); /* Sync with the XSK wakeup and with NAPI. */ + synchronize_net(); /* Sync with the XSK wakeup and with NAPI. */ mlx5e_close_rq(&c->xskrq); mlx5e_close_cq(&c->xskrq.cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 1fae7fab8297..ff81b69a59a9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -173,7 +173,7 @@ static inline bool mlx5e_accel_tx_eseg(struct mlx5e_priv *priv, #endif #if IS_ENABLED(CONFIG_GENEVE) - if (skb->encapsulation) + if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL) mlx5e_tx_tunnel_accel(skb, eseg, ihs); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 6a1d82503ef8..d06532d0baa4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -57,6 +57,20 @@ struct mlx5e_ktls_offload_context_rx { struct mlx5e_ktls_rx_resync_ctx resync; }; +static bool mlx5e_ktls_priv_rx_put(struct mlx5e_ktls_offload_context_rx *priv_rx) +{ + if (!refcount_dec_and_test(&priv_rx->resync.refcnt)) + return false; + + kfree(priv_rx); + return true; +} + +static void mlx5e_ktls_priv_rx_get(struct mlx5e_ktls_offload_context_rx *priv_rx) +{ + refcount_inc(&priv_rx->resync.refcnt); +} + static int mlx5e_ktls_create_tir(struct mlx5_core_dev *mdev, u32 *tirn, u32 rqtn) { int err, inlen; @@ -326,7 +340,7 @@ static void resync_handle_work(struct work_struct *work) priv_rx = container_of(resync, struct mlx5e_ktls_offload_context_rx, resync); if (unlikely(test_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags))) { - refcount_dec(&resync->refcnt); + mlx5e_ktls_priv_rx_put(priv_rx); return; } @@ -334,7 +348,7 @@ static void resync_handle_work(struct work_struct *work) sq = &c->async_icosq; if (resync_post_get_progress_params(sq, priv_rx)) - refcount_dec(&resync->refcnt); + mlx5e_ktls_priv_rx_put(priv_rx); } static void resync_init(struct mlx5e_ktls_rx_resync_ctx *resync, @@ -377,7 +391,11 @@ static int resync_handle_seq_match(struct mlx5e_ktls_offload_context_rx *priv_rx return err; } -/* Function is called with elevated refcount, it decreases it. */ +/* Function can be called with the refcount being either elevated or not. + * It decreases the refcount and may free the kTLS priv context. + * Refcount is not elevated only if tls_dev_del has been called, but GET_PSV was + * already in flight. + */ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, struct mlx5e_icosq *sq) { @@ -410,7 +428,7 @@ void mlx5e_ktls_handle_get_psv_completion(struct mlx5e_icosq_wqe_info *wi, tls_offload_rx_resync_async_request_end(priv_rx->sk, cpu_to_be32(hw_seq)); priv_rx->stats->tls_resync_req_end++; out: - refcount_dec(&resync->refcnt); + mlx5e_ktls_priv_rx_put(priv_rx); dma_unmap_single(dev, buf->dma_addr, PROGRESS_PARAMS_PADDED_SIZE, DMA_FROM_DEVICE); kfree(buf); } @@ -431,9 +449,9 @@ static bool resync_queue_get_psv(struct sock *sk) return false; resync = &priv_rx->resync; - refcount_inc(&resync->refcnt); + mlx5e_ktls_priv_rx_get(priv_rx); if (unlikely(!queue_work(resync->priv->tls->rx_wq, &resync->work))) - refcount_dec(&resync->refcnt); + mlx5e_ktls_priv_rx_put(priv_rx); return true; } @@ -625,31 +643,6 @@ int mlx5e_ktls_add_rx(struct net_device *netdev, struct sock *sk, return err; } -/* Elevated refcount on the resync object means there are - * outstanding operations (uncompleted GET_PSV WQEs) that - * will read the resync / priv_rx objects once completed. - * Wait for them to avoid use-after-free. - */ -static void wait_for_resync(struct net_device *netdev, - struct mlx5e_ktls_rx_resync_ctx *resync) -{ -#define MLX5E_KTLS_RX_RESYNC_TIMEOUT 20000 /* msecs */ - unsigned long exp_time = jiffies + msecs_to_jiffies(MLX5E_KTLS_RX_RESYNC_TIMEOUT); - unsigned int refcnt; - - do { - refcnt = refcount_read(&resync->refcnt); - if (refcnt == 1) - return; - - msleep(20); - } while (time_before(jiffies, exp_time)); - - netdev_warn(netdev, - "Failed waiting for kTLS RX resync refcnt to be released (%u).\n", - refcnt); -} - void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) { struct mlx5e_ktls_offload_context_rx *priv_rx; @@ -663,7 +656,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) priv_rx = mlx5e_get_ktls_rx_priv_ctx(tls_ctx); set_bit(MLX5E_PRIV_RX_FLAG_DELETING, priv_rx->flags); mlx5e_set_ktls_rx_priv_ctx(tls_ctx, NULL); - synchronize_rcu(); /* Sync with NAPI */ + synchronize_net(); /* Sync with NAPI */ if (!cancel_work_sync(&priv_rx->rule.work)) /* completion is needed, as the priv_rx in the add flow * is maintained on the wqe info (wi), not on the socket. @@ -671,8 +664,7 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) wait_for_completion(&priv_rx->add_ctx); resync = &priv_rx->resync; if (cancel_work_sync(&resync->work)) - refcount_dec(&resync->refcnt); - wait_for_resync(netdev, resync); + mlx5e_ktls_priv_rx_put(priv_rx); priv_rx->stats->tls_del++; if (priv_rx->rule.rule) @@ -680,5 +672,9 @@ void mlx5e_ktls_del_rx(struct net_device *netdev, struct tls_context *tls_ctx) mlx5_core_destroy_tir(mdev, priv_rx->tirn); mlx5_ktls_destroy_key(mdev, priv_rx->key_id); - kfree(priv_rx); + /* priv_rx should normally be freed here, but if there is an outstanding + * GET_PSV, deallocation will be delayed until the CQE for GET_PSV is + * processed. + */ + mlx5e_ktls_priv_rx_put(priv_rx); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 302001d6661e..c9d01e705ab2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -525,7 +525,7 @@ static int mlx5e_get_coalesce(struct net_device *netdev, #define MLX5E_MAX_COAL_FRAMES MLX5_MAX_CQ_COUNT static void -mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) +mlx5e_set_priv_channels_tx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) { struct mlx5_core_dev *mdev = priv->mdev; int tc; @@ -540,6 +540,17 @@ mlx5e_set_priv_channels_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesc coal->tx_coalesce_usecs, coal->tx_max_coalesced_frames); } + } +} + +static void +mlx5e_set_priv_channels_rx_coalesce(struct mlx5e_priv *priv, struct ethtool_coalesce *coal) +{ + struct mlx5_core_dev *mdev = priv->mdev; + int i; + + for (i = 0; i < priv->channels.num; ++i) { + struct mlx5e_channel *c = priv->channels.c[i]; mlx5_core_modify_cq_moderation(mdev, &c->rq.cq.mcq, coal->rx_coalesce_usecs, @@ -586,21 +597,9 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, tx_moder->pkts = coal->tx_max_coalesced_frames; new_channels.params.tx_dim_enabled = !!coal->use_adaptive_tx_coalesce; - if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { - priv->channels.params = new_channels.params; - goto out; - } - /* we are opened */ - reset_rx = !!coal->use_adaptive_rx_coalesce != priv->channels.params.rx_dim_enabled; reset_tx = !!coal->use_adaptive_tx_coalesce != priv->channels.params.tx_dim_enabled; - if (!reset_rx && !reset_tx) { - mlx5e_set_priv_channels_coalesce(priv, coal); - priv->channels.params = new_channels.params; - goto out; - } - if (reset_rx) { u8 mode = MLX5E_GET_PFLAG(&new_channels.params, MLX5E_PFLAG_RX_CQE_BASED_MODER); @@ -614,6 +613,20 @@ int mlx5e_ethtool_set_coalesce(struct mlx5e_priv *priv, mlx5e_reset_tx_moderation(&new_channels.params, mode); } + if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + priv->channels.params = new_channels.params; + goto out; + } + + if (!reset_rx && !reset_tx) { + if (!coal->use_adaptive_rx_coalesce) + mlx5e_set_priv_channels_rx_coalesce(priv, coal); + if (!coal->use_adaptive_tx_coalesce) + mlx5e_set_priv_channels_tx_coalesce(priv, coal); + priv->channels.params = new_channels.params; + goto out; + } + err = mlx5e_safe_switch_channels(priv, &new_channels, NULL, NULL); out: @@ -1863,6 +1876,7 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; + int err; if (!MLX5_CAP_GEN(mdev, cqe_compression)) return -EOPNOTSUPP; @@ -1872,7 +1886,10 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev, return -EINVAL; } - mlx5e_modify_rx_cqe_compression_locked(priv, enable); + err = mlx5e_modify_rx_cqe_compression_locked(priv, enable); + if (err) + return err; + priv->channels.params.rx_cqe_compress_def = enable; return 0; @@ -1980,8 +1997,13 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable) */ if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { + struct mlx5e_params old_params; + + old_params = priv->channels.params; priv->channels.params = new_channels.params; err = mlx5e_num_channels_changed(priv); + if (err) + priv->channels.params = old_params; goto out; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 3fc7d18ac868..aaa5a56b44c7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -65,6 +65,7 @@ #include "en/devlink.h" #include "lib/mlx5.h" #include "en/ptp.h" +#include "fpga/ipsec.h" bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) { @@ -106,7 +107,7 @@ bool mlx5e_striding_rq_possible(struct mlx5_core_dev *mdev, if (!mlx5e_check_fragmented_striding_rq_cap(mdev)) return false; - if (MLX5_IPSEC_DEV(mdev)) + if (mlx5_fpga_is_ipsec_device(mdev)) return false; if (params->xdp_prog) { @@ -304,9 +305,9 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq rq->wqe_overflow.addr); } -static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix) +static u64 mlx5e_get_mpwqe_offset(u16 wqe_ix) { - return (wqe_ix << MLX5E_LOG_ALIGNED_MPWQE_PPW) << PAGE_SHIFT; + return MLX5E_REQUIRED_MTTS(wqe_ix) << PAGE_SHIFT; } static void mlx5e_init_frags_partition(struct mlx5e_rq *rq) @@ -546,7 +547,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c, mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i); u32 byte_count = rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz; - u64 dma_offset = mlx5e_get_mpwqe_offset(rq, i); + u64 dma_offset = mlx5e_get_mpwqe_offset(i); wqe->data[0].addr = cpu_to_be64(dma_offset + rq->buff.headroom); wqe->data[0].byte_count = cpu_to_be32(byte_count); @@ -914,7 +915,7 @@ void mlx5e_activate_rq(struct mlx5e_rq *rq) void mlx5e_deactivate_rq(struct mlx5e_rq *rq) { clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state); - synchronize_rcu(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ + synchronize_net(); /* Sync with NAPI to prevent mlx5e_post_rx_wqes. */ } void mlx5e_close_rq(struct mlx5e_rq *rq) @@ -1348,7 +1349,7 @@ void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq) struct mlx5_wq_cyc *wq = &sq->wq; clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - synchronize_rcu(); /* Sync with NAPI to prevent netif_tx_wake_queue. */ + synchronize_net(); /* Sync with NAPI to prevent netif_tx_wake_queue. */ mlx5e_tx_disable_queue(sq->txq); @@ -1423,7 +1424,7 @@ void mlx5e_activate_icosq(struct mlx5e_icosq *icosq) void mlx5e_deactivate_icosq(struct mlx5e_icosq *icosq) { clear_bit(MLX5E_SQ_STATE_ENABLED, &icosq->state); - synchronize_rcu(); /* Sync with NAPI. */ + synchronize_net(); /* Sync with NAPI. */ } void mlx5e_close_icosq(struct mlx5e_icosq *sq) @@ -1502,7 +1503,7 @@ void mlx5e_close_xdpsq(struct mlx5e_xdpsq *sq) struct mlx5e_channel *c = sq->channel; clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); - synchronize_rcu(); /* Sync with NAPI. */ + synchronize_net(); /* Sync with NAPI. */ mlx5e_destroy_sq(c->mdev, sq->sqn); mlx5e_free_xdpsq_descs(sq); @@ -1826,12 +1827,12 @@ static int mlx5e_open_queues(struct mlx5e_channel *c, mlx5e_build_create_cq_param(&ccp, c); - err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp, + err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp, &c->async_icosq.cq); if (err) return err; - err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->async_icosq.cqp, &ccp, + err = mlx5e_open_cq(c->priv, icocq_moder, &cparam->icosq.cqp, &ccp, &c->icosq.cq); if (err) goto err_close_async_icosq_cq; @@ -2069,7 +2070,7 @@ static void mlx5e_build_rq_frags_info(struct mlx5_core_dev *mdev, int i; #ifdef CONFIG_MLX5_EN_IPSEC - if (MLX5_IPSEC_DEV(mdev)) + if (mlx5_fpga_is_ipsec_device(mdev)) byte_count += MLX5E_METADATA_ETHER_LEN; #endif @@ -2442,8 +2443,10 @@ void mlx5e_close_channels(struct mlx5e_channels *chs) { int i; - if (chs->port_ptp) + if (chs->port_ptp) { mlx5e_port_ptp_close(chs->port_ptp); + chs->port_ptp = NULL; + } for (i = 0; i < chs->num; i++) mlx5e_close_channel(chs->c[i]); @@ -3700,10 +3703,17 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) } if (mlx5e_is_uplink_rep(priv)) { + struct mlx5e_vport_stats *vstats = &priv->stats.vport; + stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok); stats->rx_bytes = PPORT_802_3_GET(pstats, a_octets_received_ok); stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok); stats->tx_bytes = PPORT_802_3_GET(pstats, a_octets_transmitted_ok); + + /* vport multicast also counts packets that are dropped due to steering + * or rx out of buffer + */ + stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets); } else { mlx5e_fold_sw_stats64(priv, stats); } @@ -4455,8 +4465,9 @@ static int mlx5e_xdp_allowed(struct mlx5e_priv *priv, struct bpf_prog *prog) return -EINVAL; } - if (MLX5_IPSEC_DEV(priv->mdev)) { - netdev_warn(netdev, "can't set XDP with IPSec offload\n"); + if (mlx5_fpga_is_ipsec_device(priv->mdev)) { + netdev_warn(netdev, + "XDP is not available on Innova cards with IPsec support\n"); return -EINVAL; } @@ -4546,8 +4557,10 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) struct mlx5e_channel *c = priv->channels.c[i]; mlx5e_rq_replace_xdp_prog(&c->rq, prog); - if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) + if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) { + bpf_prog_inc(prog); mlx5e_rq_replace_xdp_prog(&c->xskrq, prog); + } } unlock: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index ca4b55839a8a..b2e71a045df0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -505,7 +505,6 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) struct mlx5e_icosq *sq = rq->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *umr_wqe; - u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1); u16 pi; int err; int i; @@ -536,7 +535,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) umr_wqe->ctrl.opmod_idx_opcode = cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | MLX5_OPCODE_UMR); - umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset); + umr_wqe->uctrl.xlt_offset = + cpu_to_be16(MLX5_ALIGNED_MTTS_OCTW(MLX5E_REQUIRED_MTTS(ix))); sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) { .wqe_type = MLX5E_ICOSQ_WQE_UMR_RX, @@ -1795,8 +1795,8 @@ int mlx5e_rq_set_handlers(struct mlx5e_rq *rq, struct mlx5e_params *params, bool rq->handle_rx_cqe = priv->profile->rx_handlers->handle_rx_cqe_mpwqe; #ifdef CONFIG_MLX5_EN_IPSEC - if (MLX5_IPSEC_DEV(mdev)) { - netdev_err(netdev, "MPWQE RQ with IPSec offload not supported\n"); + if (mlx5_fpga_is_ipsec_device(mdev)) { + netdev_err(netdev, "MPWQE RQ with Innova IPSec offload not supported\n"); return -EINVAL; } #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index dd0bfbacad47..24fa399b1577 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2595,6 +2595,16 @@ static int __parse_cls_flower(struct mlx5e_priv *priv, *match_level = MLX5_MATCH_L4; } + /* Currenlty supported only for MPLS over UDP */ + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) && + !netif_is_bareudp(filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "Matching on MPLS is supported only for MPLS over UDP"); + netdev_err(priv->netdev, + "Matching on MPLS is supported only for MPLS over UDP\n"); + return -EOPNOTSUPP; + } + return 0; } @@ -3198,6 +3208,37 @@ static int is_action_keys_supported(const struct flow_action_entry *act, return 0; } +static bool modify_tuple_supported(bool modify_tuple, bool ct_clear, + bool ct_flow, struct netlink_ext_ack *extack, + struct mlx5e_priv *priv, + struct mlx5_flow_spec *spec) +{ + if (!modify_tuple || ct_clear) + return true; + + if (ct_flow) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload tuple modification with non-clear ct()"); + netdev_info(priv->netdev, + "can't offload tuple modification with non-clear ct()"); + return false; + } + + /* Add ct_state=-trk match so it will be offloaded for non ct flows + * (or after clear action), as otherwise, since the tuple is changed, + * we can't restore ct state + */ + if (mlx5_tc_ct_add_no_trk_match(spec)) { + NL_SET_ERR_MSG_MOD(extack, + "can't offload tuple modification with ct matches and no ct(clear) action"); + netdev_info(priv->netdev, + "can't offload tuple modification with ct matches and no ct(clear) action"); + return false; + } + + return true; +} + static bool modify_header_match_supported(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, struct flow_action *flow_action, @@ -3236,18 +3277,9 @@ static bool modify_header_match_supported(struct mlx5e_priv *priv, return err; } - /* Add ct_state=-trk match so it will be offloaded for non ct flows - * (or after clear action), as otherwise, since the tuple is changed, - * we can't restore ct state - */ - if (!ct_clear && modify_tuple && - mlx5_tc_ct_add_no_trk_match(spec)) { - NL_SET_ERR_MSG_MOD(extack, - "can't offload tuple modify header with ct matches"); - netdev_info(priv->netdev, - "can't offload tuple modify header with ct matches"); + if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack, + priv, spec)) return false; - } ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol); if (modify_ip_header && ip_proto != IPPROTO_TCP && @@ -5040,7 +5072,8 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate, */ if (rate) { rate = (rate * BITS_PER_BYTE) + 500000; - rate_mbps = max_t(u32, do_div(rate, 1000000), 1); + do_div(rate, 1000000); + rate_mbps = max_t(u32, rate, 1); } err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c index cc67366495b0..22bee4990232 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c @@ -124,7 +124,7 @@ struct mlx5_fpga_ipsec { struct ida halloc; }; -static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) +bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) { if (!mdev->fpga || !MLX5_CAP_GEN(mdev, fpga)) return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h index db88eb4c49e3..8931b5584477 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h @@ -43,6 +43,7 @@ u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev); const struct mlx5_flow_cmds * mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type); void mlx5_fpga_ipsec_build_fs_cmds(void); +bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev); #else static inline const struct mlx5_accel_ipsec_ops *mlx5_fpga_ipsec_ops(struct mlx5_core_dev *mdev) @@ -55,6 +56,7 @@ mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type) } static inline void mlx5_fpga_ipsec_build_fs_cmds(void) {}; +static inline bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev) { return false; } #endif /* CONFIG_MLX5_FPGA_IPSEC */ #endif /* __MLX5_FPGA_IPSEC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 54523bed16cd..0c32c485eb58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -190,6 +190,16 @@ static bool reset_fw_if_needed(struct mlx5_core_dev *dev) return true; } +static void enter_error_state(struct mlx5_core_dev *dev, bool force) +{ + if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */ + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; + mlx5_cmd_flush(dev); + } + + mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); +} + void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) { bool err_detected = false; @@ -208,12 +218,7 @@ void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force) goto unlock; } - if (mlx5_health_check_fatal_sensors(dev) || force) { /* protected state setting */ - dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; - mlx5_cmd_flush(dev); - } - - mlx5_notifier_call_chain(dev->priv.events, MLX5_DEV_EVENT_SYS_ERROR, (void *)1); + enter_error_state(dev, force); unlock: mutex_unlock(&dev->intf_state_mutex); } @@ -613,7 +618,7 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) priv = container_of(health, struct mlx5_priv, health); dev = container_of(priv, struct mlx5_core_dev, priv); - mlx5_enter_error_state(dev, false); + enter_error_state(dev, false); if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { if (mlx5_health_try_recover(dev)) mlx5_core_err(dev, "health recovery failed\n"); @@ -707,8 +712,9 @@ static void poll_health(struct timer_list *t) mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error); dev->priv.health.fatal_error = fatal_error; print_health_info(dev); + dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR; mlx5_trigger_health_work(dev); - goto out; + return; } count = ioread32be(health->health_counter); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index ca6f2fc39ea0..ba1a4ae28097 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1396,7 +1396,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) dev_err(&pdev->dev, "mlx5_crdump_enable failed with error code %d\n", err); pci_save_state(pdev); - devlink_reload_enable(devlink); + if (!mlx5_core_is_mp_slave(dev)) + devlink_reload_enable(devlink); return 0; err_load_one: diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 16e2df6ef2f4..c4adc7f740d3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4430,6 +4430,7 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32); #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 BIT(20) #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 BIT(21) #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 BIT(22) +#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4 BIT(23) #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR BIT(27) #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR BIT(28) #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR BIT(29) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index 540616469e28..68333ecf6151 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -1171,6 +1171,11 @@ static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = { .mask_ethtool = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, .speed = SPEED_100000, }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, + .speed = SPEED_100000, + }, }; #define MLXSW_SP1_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp1_port_link_mode) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 41424ee909a0..23d9fe18adba 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -5861,6 +5861,10 @@ mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp, if (mlxsw_sp->router->aborted) return 0; + if (fen_info->fi->nh && + !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, fen_info->fi->nh->id)) + return 0; + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id, &fen_info->dst, sizeof(fen_info->dst), fen_info->dst_len, @@ -6511,6 +6515,9 @@ static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp, if (mlxsw_sp_fib6_rt_should_ignore(rt)) return 0; + if (rt->nh && !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, rt->nh->id)) + return 0; + fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id, &rt->fib6_dst.addr, sizeof(rt->fib6_dst.addr), diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 40e2e79d4517..131b2a53d261 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -613,7 +613,8 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = { { .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | - MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4, + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | + MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4, .speed = 100000, }, }; diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c index 729495a1a77e..365550335292 100644 --- a/drivers/net/ethernet/mscc/ocelot_flower.c +++ b/drivers/net/ethernet/mscc/ocelot_flower.c @@ -540,13 +540,14 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress, return -EOPNOTSUPP; } + flow_rule_match_ipv4_addrs(rule, &match); + if (filter->block_id == VCAP_IS1 && *(u32 *)&match.mask->dst) { NL_SET_ERR_MSG_MOD(extack, "Key type S1_NORMAL cannot match on destination IP"); return -EOPNOTSUPP; } - flow_rule_match_ipv4_addrs(rule, &match); tmp = &filter->key.ipv4.sip.value.addr[0]; memcpy(tmp, &match.key->src, 4); diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index 5defd31d481c..aa06fcb38f8b 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -327,8 +327,14 @@ int nfp_compile_flow_metadata(struct nfp_app *app, goto err_free_ctx_entry; } + /* Do net allocate a mask-id for pre_tun_rules. These flows are used to + * configure the pre_tun table and are never actually send to the + * firmware as an add-flow message. This causes the mask-id allocation + * on the firmware to get out of sync if allocated here. + */ new_mask_id = 0; - if (!nfp_check_mask_add(app, nfp_flow->mask_data, + if (!nfp_flow->pre_tun_rule.dev && + !nfp_check_mask_add(app, nfp_flow->mask_data, nfp_flow->meta.mask_len, &nfp_flow->meta.flags, &new_mask_id)) { NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot allocate a new mask id"); @@ -359,7 +365,8 @@ int nfp_compile_flow_metadata(struct nfp_app *app, goto err_remove_mask; } - if (!nfp_check_mask_remove(app, nfp_flow->mask_data, + if (!nfp_flow->pre_tun_rule.dev && + !nfp_check_mask_remove(app, nfp_flow->mask_data, nfp_flow->meta.mask_len, NULL, &new_mask_id)) { NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot release mask id"); @@ -374,8 +381,10 @@ int nfp_compile_flow_metadata(struct nfp_app *app, return 0; err_remove_mask: - nfp_check_mask_remove(app, nfp_flow->mask_data, nfp_flow->meta.mask_len, - NULL, &new_mask_id); + if (!nfp_flow->pre_tun_rule.dev) + nfp_check_mask_remove(app, nfp_flow->mask_data, + nfp_flow->meta.mask_len, + NULL, &new_mask_id); err_remove_rhash: WARN_ON_ONCE(rhashtable_remove_fast(&priv->stats_ctx_table, &ctx_entry->ht_node, @@ -406,9 +415,10 @@ int nfp_modify_flow_metadata(struct nfp_app *app, __nfp_modify_flow_metadata(priv, nfp_flow); - nfp_check_mask_remove(app, nfp_flow->mask_data, - nfp_flow->meta.mask_len, &nfp_flow->meta.flags, - &new_mask_id); + if (!nfp_flow->pre_tun_rule.dev) + nfp_check_mask_remove(app, nfp_flow->mask_data, + nfp_flow->meta.mask_len, &nfp_flow->meta.flags, + &new_mask_id); /* Update flow payload with mask ids. */ nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id; diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 1c59aff2163c..d72225d64a75 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1142,6 +1142,12 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, return -EOPNOTSUPP; } + if (!(key_layer & NFP_FLOWER_LAYER_IPV4) && + !(key_layer & NFP_FLOWER_LAYER_IPV6)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: match on ipv4/ipv6 eth_type must be present"); + return -EOPNOTSUPP; + } + /* Skip fields known to exist. */ mask += sizeof(struct nfp_flower_meta_tci); ext += sizeof(struct nfp_flower_meta_tci); @@ -1152,6 +1158,13 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, mask += sizeof(struct nfp_flower_in_port); ext += sizeof(struct nfp_flower_in_port); + /* Ensure destination MAC address matches pre_tun_dev. */ + mac = (struct nfp_flower_mac_mpls *)ext; + if (memcmp(&mac->mac_dst[0], flow->pre_tun_rule.dev->dev_addr, 6)) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: dest MAC must match output dev MAC"); + return -EOPNOTSUPP; + } + /* Ensure destination MAC address is fully matched. */ mac = (struct nfp_flower_mac_mpls *)mask; if (!is_broadcast_ether_addr(&mac->mac_dst[0])) { @@ -1159,6 +1172,11 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app, return -EOPNOTSUPP; } + if (mac->mpls_lse) { + NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: MPLS not supported"); + return -EOPNOTSUPP; + } + mask += sizeof(struct nfp_flower_mac_mpls); ext += sizeof(struct nfp_flower_mac_mpls); if (key_layer & NFP_FLOWER_LAYER_IPV4 || diff --git a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c index 7248d248f604..d19c02e99114 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c +++ b/drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c @@ -16,8 +16,9 @@ #define NFP_FL_MAX_ROUTES 32 #define NFP_TUN_PRE_TUN_RULE_LIMIT 32 -#define NFP_TUN_PRE_TUN_RULE_DEL 0x1 -#define NFP_TUN_PRE_TUN_IDX_BIT 0x8 +#define NFP_TUN_PRE_TUN_RULE_DEL BIT(0) +#define NFP_TUN_PRE_TUN_IDX_BIT BIT(3) +#define NFP_TUN_PRE_TUN_IPV6_BIT BIT(7) /** * struct nfp_tun_pre_run_rule - rule matched before decap @@ -1268,6 +1269,7 @@ int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app, { struct nfp_flower_priv *app_priv = app->priv; struct nfp_tun_offloaded_mac *mac_entry; + struct nfp_flower_meta_tci *key_meta; struct nfp_tun_pre_tun_rule payload; struct net_device *internal_dev; int err; @@ -1290,6 +1292,15 @@ int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app, if (!mac_entry) return -ENOENT; + /* Set/clear IPV6 bit. cpu_to_be16() swap will lead to MSB being + * set/clear for port_idx. + */ + key_meta = (struct nfp_flower_meta_tci *)flow->unmasked_data; + if (key_meta->nfp_flow_key_layer & NFP_FLOWER_LAYER_IPV6) + mac_entry->index |= NFP_TUN_PRE_TUN_IPV6_BIT; + else + mac_entry->index &= ~NFP_TUN_PRE_TUN_IPV6_BIT; + payload.port_idx = cpu_to_be16(mac_entry->index); /* Copy mac id and vlan to flow - dev may not exist at delete time. */ diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c index ac4cd5d82e69..b7601cadcb8c 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c @@ -1079,15 +1079,17 @@ static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb) { int sg_elems = q->lif->qtype_info[IONIC_QTYPE_TXQ].max_sg_elems; struct ionic_tx_stats *stats = q_to_tx_stats(q); + int ndescs; int err; - /* If TSO, need roundup(skb->len/mss) descs */ + /* Each desc is mss long max, so a descriptor for each gso_seg */ if (skb_is_gso(skb)) - return (skb->len / skb_shinfo(skb)->gso_size) + 1; + ndescs = skb_shinfo(skb)->gso_segs; + else + ndescs = 1; - /* If non-TSO, just need 1 desc and nr_frags sg elems */ if (skb_shinfo(skb)->nr_frags <= sg_elems) - return 1; + return ndescs; /* Too many frags, so linearize */ err = skb_linearize(skb); @@ -1096,8 +1098,7 @@ static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb) stats->linearize++; - /* Need 1 desc and zero sg elems */ - return 1; + return ndescs; } static int ionic_maybe_stop_tx(struct ionic_queue *q, int ndescs) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c index 7760a3394e93..7ecb3dfe30bd 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c @@ -1425,6 +1425,7 @@ void qlcnic_83xx_get_minidump_template(struct qlcnic_adapter *adapter) if (fw_dump->tmpl_hdr == NULL || current_version > prev_version) { vfree(fw_dump->tmpl_hdr); + fw_dump->tmpl_hdr = NULL; if (qlcnic_83xx_md_check_extended_dump_capability(adapter)) extended = !qlcnic_83xx_extend_md_capab(adapter); @@ -1443,6 +1444,8 @@ void qlcnic_83xx_get_minidump_template(struct qlcnic_adapter *adapter) struct qlcnic_83xx_dump_template_hdr *hdr; hdr = fw_dump->tmpl_hdr; + if (!hdr) + return; hdr->drv_cap_mask = 0x1f; fw_dump->cap_mask = 0x1f; dev_info(&pdev->dev, diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 0d78408b4e26..7c1a057dcf3d 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1013,7 +1013,7 @@ static void r8168fp_adjust_ocp_cmd(struct rtl8169_private *tp, u32 *cmd, int typ { /* based on RTL8168FP_OOBMAC_BASE in vendor driver */ if (tp->mac_version == RTL_GIGA_MAC_VER_52 && type == ERIAR_OOB) - *cmd |= 0x7f0 << 18; + *cmd |= 0xf70 << 18; } DECLARE_RTL_COND(rtl_eriar_cond) @@ -2208,6 +2208,7 @@ static void rtl_pll_power_down(struct rtl8169_private *tp) switch (tp->mac_version) { case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: + case RTL_GIGA_MAC_VER_29 ... RTL_GIGA_MAC_VER_30: case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33: case RTL_GIGA_MAC_VER_37: case RTL_GIGA_MAC_VER_39: @@ -2235,6 +2236,7 @@ static void rtl_pll_power_up(struct rtl8169_private *tp) { switch (tp->mac_version) { case RTL_GIGA_MAC_VER_25 ... RTL_GIGA_MAC_VER_26: + case RTL_GIGA_MAC_VER_29 ... RTL_GIGA_MAC_VER_30: case RTL_GIGA_MAC_VER_32 ... RTL_GIGA_MAC_VER_33: case RTL_GIGA_MAC_VER_37: case RTL_GIGA_MAC_VER_39: @@ -2315,14 +2317,14 @@ static void r8168dp_hw_jumbo_disable(struct rtl8169_private *tp) static void r8168e_hw_jumbo_enable(struct rtl8169_private *tp) { - RTL_W8(tp, MaxTxPacketSize, 0x3f); + RTL_W8(tp, MaxTxPacketSize, 0x24); RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0); RTL_W8(tp, Config4, RTL_R8(tp, Config4) | 0x01); } static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp) { - RTL_W8(tp, MaxTxPacketSize, 0x0c); + RTL_W8(tp, MaxTxPacketSize, 0x3f); RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0); RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~0x01); } @@ -4675,6 +4677,9 @@ static void rtl8169_down(struct rtl8169_private *tp) rtl8169_update_counters(tp); + pci_clear_master(tp->pci_dev); + rtl_pci_commit(tp); + rtl8169_cleanup(tp, true); rtl_pll_power_down(tp); @@ -4682,6 +4687,7 @@ static void rtl8169_down(struct rtl8169_private *tp) static void rtl8169_up(struct rtl8169_private *tp) { + pci_set_master(tp->pci_dev); rtl_pll_power_up(tp); rtl8169_init_phy(tp); napi_enable(&tp->napi); @@ -5346,8 +5352,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rtl_hw_reset(tp); - pci_set_master(pdev); - rc = rtl_alloc_irq(tp); if (rc < 0) { dev_err(&pdev->dev, "Can't allocate interrupt\n"); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 590b088bc4c7..f029c7c03804 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -560,6 +560,8 @@ static struct sh_eth_cpu_data r7s72100_data = { EESR_TDE, .fdr_value = 0x0000070f, + .trscer_err_mask = DESC_I_RINT8 | DESC_I_RINT5, + .no_psr = 1, .apr = 1, .mpr = 1, @@ -780,6 +782,8 @@ static struct sh_eth_cpu_data r7s9210_data = { .fdr_value = 0x0000070f, + .trscer_err_mask = DESC_I_RINT8 | DESC_I_RINT5, + .apr = 1, .mpr = 1, .tpauser = 1, @@ -1089,6 +1093,9 @@ static struct sh_eth_cpu_data sh771x_data = { EESIPR_CEEFIP | EESIPR_CELFIP | EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP | EESIPR_PREIP | EESIPR_CERFIP, + + .trscer_err_mask = DESC_I_RINT8, + .tsu = 1, .dual_port = 1, }; diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 19d20a6d0d44..3e172fc64976 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -1718,14 +1718,17 @@ static int netsec_netdev_init(struct net_device *ndev) goto err1; /* set phy power down */ - data = netsec_phy_read(priv->mii_bus, priv->phy_addr, MII_BMCR) | - BMCR_PDOWN; - netsec_phy_write(priv->mii_bus, priv->phy_addr, MII_BMCR, data); + data = netsec_phy_read(priv->mii_bus, priv->phy_addr, MII_BMCR); + netsec_phy_write(priv->mii_bus, priv->phy_addr, MII_BMCR, + data | BMCR_PDOWN); ret = netsec_reset_hardware(priv, true); if (ret) goto err2; + /* Restore phy power state */ + netsec_phy_write(priv->mii_bus, priv->phy_addr, MII_BMCR, data); + spin_lock_init(&priv->desc_ring[NETSEC_RING_TX].lock); spin_lock_init(&priv->desc_ring[NETSEC_RING_RX].lock); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index 103d2448e9e0..a9087dae767d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -233,6 +233,7 @@ static void common_default_data(struct plat_stmmacenet_data *plat) static int intel_mgbe_common_data(struct pci_dev *pdev, struct plat_stmmacenet_data *plat) { + char clk_name[20]; int ret; int i; @@ -300,8 +301,10 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->eee_usecs_rate = plat->clk_ptp_rate; /* Set system clock */ + sprintf(clk_name, "%s-%s", "stmmac", pci_name(pdev)); + plat->stmmac_clk = clk_register_fixed_rate(&pdev->dev, - "stmmac-clk", NULL, 0, + clk_name, NULL, 0, plat->clk_ptp_rate); if (IS_ERR(plat->stmmac_clk)) { diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index f184b00f5116..5f500141567d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -301,7 +301,7 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac) return -EINVAL; } - if (rx_dly_config & PRG_ETH0_ADJ_ENABLE) { + if (delay_config & PRG_ETH0_ADJ_ENABLE) { if (!dwmac->timing_adj_clk) { dev_err(dwmac->dev, "The timing-adjustment clock is mandatory for the RX delay re-timing\n"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index a5e0eff4a387..9f5ccf1a0a54 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1217,6 +1217,8 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) plat_dat->init = sun8i_dwmac_init; plat_dat->exit = sun8i_dwmac_exit; plat_dat->setup = sun8i_dwmac_setup; + plat_dat->tx_fifo_size = 4096; + plat_dat->rx_fifo_size = 16384; ret = sun8i_dwmac_set_syscon(&pdev->dev, plat_dat); if (ret) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index c6540b003b43..cbf4429fb1d2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -402,19 +402,53 @@ static void dwmac4_rd_set_tx_ic(struct dma_desc *p) p->des2 |= cpu_to_le32(TDES2_INTERRUPT_ON_COMPLETION); } -static void dwmac4_display_ring(void *head, unsigned int size, bool rx) +static void dwmac4_display_ring(void *head, unsigned int size, bool rx, + dma_addr_t dma_rx_phy, unsigned int desc_size) { - struct dma_desc *p = (struct dma_desc *)head; + dma_addr_t dma_addr; int i; pr_info("%s descriptor ring:\n", rx ? "RX" : "TX"); - for (i = 0; i < size; i++) { - pr_info("%03d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", - i, (unsigned int)virt_to_phys(p), - le32_to_cpu(p->des0), le32_to_cpu(p->des1), - le32_to_cpu(p->des2), le32_to_cpu(p->des3)); - p++; + if (desc_size == sizeof(struct dma_desc)) { + struct dma_desc *p = (struct dma_desc *)head; + + for (i = 0; i < size; i++) { + dma_addr = dma_rx_phy + i * sizeof(*p); + pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x\n", + i, &dma_addr, + le32_to_cpu(p->des0), le32_to_cpu(p->des1), + le32_to_cpu(p->des2), le32_to_cpu(p->des3)); + p++; + } + } else if (desc_size == sizeof(struct dma_extended_desc)) { + struct dma_extended_desc *extp = (struct dma_extended_desc *)head; + + for (i = 0; i < size; i++) { + dma_addr = dma_rx_phy + i * sizeof(*extp); + pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", + i, &dma_addr, + le32_to_cpu(extp->basic.des0), le32_to_cpu(extp->basic.des1), + le32_to_cpu(extp->basic.des2), le32_to_cpu(extp->basic.des3), + le32_to_cpu(extp->des4), le32_to_cpu(extp->des5), + le32_to_cpu(extp->des6), le32_to_cpu(extp->des7)); + extp++; + } + } else if (desc_size == sizeof(struct dma_edesc)) { + struct dma_edesc *ep = (struct dma_edesc *)head; + + for (i = 0; i < size; i++) { + dma_addr = dma_rx_phy + i * sizeof(*ep); + pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n", + i, &dma_addr, + le32_to_cpu(ep->des4), le32_to_cpu(ep->des5), + le32_to_cpu(ep->des6), le32_to_cpu(ep->des7), + le32_to_cpu(ep->basic.des0), le32_to_cpu(ep->basic.des1), + le32_to_cpu(ep->basic.des2), le32_to_cpu(ep->basic.des3)); + ep++; + } + } else { + pr_err("unsupported descriptor!"); } } @@ -499,10 +533,15 @@ static void dwmac4_get_rx_header_len(struct dma_desc *p, unsigned int *len) *len = le32_to_cpu(p->des2) & RDES2_HL; } -static void dwmac4_set_sec_addr(struct dma_desc *p, dma_addr_t addr) +static void dwmac4_set_sec_addr(struct dma_desc *p, dma_addr_t addr, bool buf2_valid) { p->des2 = cpu_to_le32(lower_32_bits(addr)); - p->des3 = cpu_to_le32(upper_32_bits(addr) | RDES3_BUFFER2_VALID_ADDR); + p->des3 = cpu_to_le32(upper_32_bits(addr)); + + if (buf2_valid) + p->des3 |= cpu_to_le32(RDES3_BUFFER2_VALID_ADDR); + else + p->des3 &= cpu_to_le32(~RDES3_BUFFER2_VALID_ADDR); } static void dwmac4_set_tbs(struct dma_edesc *p, u32 sec, u32 nsec) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index bb29bfcd62c3..62aa0e95beb7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -124,6 +124,23 @@ static void dwmac4_dma_init_channel(void __iomem *ioaddr, ioaddr + DMA_CHAN_INTR_ENA(chan)); } +static void dwmac410_dma_init_channel(void __iomem *ioaddr, + struct stmmac_dma_cfg *dma_cfg, u32 chan) +{ + u32 value; + + /* common channel control register config */ + value = readl(ioaddr + DMA_CHAN_CONTROL(chan)); + if (dma_cfg->pblx8) + value = value | DMA_BUS_MODE_PBL; + + writel(value, ioaddr + DMA_CHAN_CONTROL(chan)); + + /* Mask interrupts by writing to CSR7 */ + writel(DMA_CHAN_INTR_DEFAULT_MASK_4_10, + ioaddr + DMA_CHAN_INTR_ENA(chan)); +} + static void dwmac4_dma_init(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg, int atds) { @@ -523,7 +540,7 @@ const struct stmmac_dma_ops dwmac4_dma_ops = { const struct stmmac_dma_ops dwmac410_dma_ops = { .reset = dwmac4_dma_reset, .init = dwmac4_dma_init, - .init_chan = dwmac4_dma_init_channel, + .init_chan = dwmac410_dma_init_channel, .init_rx_chan = dwmac4_dma_init_rx_chan, .init_tx_chan = dwmac4_dma_init_tx_chan, .axi = dwmac4_dma_axi, diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index 0b4ee2dbb691..71e50751ef2d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -53,10 +53,6 @@ void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan) value &= ~DMA_CONTROL_ST; writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan)); - - value = readl(ioaddr + GMAC_CONFIG); - value &= ~GMAC_CONFIG_TE; - writel(value, ioaddr + GMAC_CONFIG); } void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c index 0aaf19ab5672..ccfb0102dde4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c @@ -292,7 +292,7 @@ static void dwxgmac2_get_rx_header_len(struct dma_desc *p, unsigned int *len) *len = le32_to_cpu(p->des2) & XGMAC_RDES2_HL; } -static void dwxgmac2_set_sec_addr(struct dma_desc *p, dma_addr_t addr) +static void dwxgmac2_set_sec_addr(struct dma_desc *p, dma_addr_t addr, bool is_valid) { p->des2 = cpu_to_le32(lower_32_bits(addr)); p->des3 = cpu_to_le32(upper_32_bits(addr)); diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c index d02cec296f51..6650edfab5bc 100644 --- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c @@ -417,19 +417,22 @@ static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc, } } -static void enh_desc_display_ring(void *head, unsigned int size, bool rx) +static void enh_desc_display_ring(void *head, unsigned int size, bool rx, + dma_addr_t dma_rx_phy, unsigned int desc_size) { struct dma_extended_desc *ep = (struct dma_extended_desc *)head; + dma_addr_t dma_addr; int i; pr_info("Extended %s descriptor ring:\n", rx ? "RX" : "TX"); for (i = 0; i < size; i++) { u64 x; + dma_addr = dma_rx_phy + i * sizeof(*ep); x = *(u64 *)ep; - pr_info("%03d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", - i, (unsigned int)virt_to_phys(ep), + pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x\n", + i, &dma_addr, (unsigned int)x, (unsigned int)(x >> 32), ep->basic.des2, ep->basic.des3); ep++; diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.h b/drivers/net/ethernet/stmicro/stmmac/hwif.h index b40b2e0667bb..979ac9fca23c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.h +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.h @@ -78,7 +78,8 @@ struct stmmac_desc_ops { /* get rx timestamp status */ int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats); /* Display ring */ - void (*display_ring)(void *head, unsigned int size, bool rx); + void (*display_ring)(void *head, unsigned int size, bool rx, + dma_addr_t dma_rx_phy, unsigned int desc_size); /* set MSS via context descriptor */ void (*set_mss)(struct dma_desc *p, unsigned int mss); /* get descriptor skbuff address */ @@ -91,7 +92,7 @@ struct stmmac_desc_ops { int (*get_rx_hash)(struct dma_desc *p, u32 *hash, enum pkt_hash_types *type); void (*get_rx_header_len)(struct dma_desc *p, unsigned int *len); - void (*set_sec_addr)(struct dma_desc *p, dma_addr_t addr); + void (*set_sec_addr)(struct dma_desc *p, dma_addr_t addr, bool buf2_valid); void (*set_sarc)(struct dma_desc *p, u32 sarc_type); void (*set_vlan_tag)(struct dma_desc *p, u16 tag, u16 inner_tag, u32 inner_type); diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c index f083360e4ba6..98ef43f35802 100644 --- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c +++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c @@ -269,19 +269,22 @@ static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats) return 1; } -static void ndesc_display_ring(void *head, unsigned int size, bool rx) +static void ndesc_display_ring(void *head, unsigned int size, bool rx, + dma_addr_t dma_rx_phy, unsigned int desc_size) { struct dma_desc *p = (struct dma_desc *)head; + dma_addr_t dma_addr; int i; pr_info("%s descriptor ring:\n", rx ? "RX" : "TX"); for (i = 0; i < size; i++) { u64 x; + dma_addr = dma_rx_phy + i * sizeof(*p); x = *(u64 *)p; - pr_info("%03d [0x%x]: 0x%x 0x%x 0x%x 0x%x", - i, (unsigned int)virt_to_phys(p), + pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x", + i, &dma_addr, (unsigned int)x, (unsigned int)(x >> 32), p->des2, p->des3); p++; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 26b971cd4da5..4749bd0af160 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1133,6 +1133,7 @@ static int stmmac_phy_setup(struct stmmac_priv *priv) static void stmmac_display_rx_rings(struct stmmac_priv *priv) { u32 rx_cnt = priv->plat->rx_queues_to_use; + unsigned int desc_size; void *head_rx; u32 queue; @@ -1142,19 +1143,24 @@ static void stmmac_display_rx_rings(struct stmmac_priv *priv) pr_info("\tRX Queue %u rings\n", queue); - if (priv->extend_desc) + if (priv->extend_desc) { head_rx = (void *)rx_q->dma_erx; - else + desc_size = sizeof(struct dma_extended_desc); + } else { head_rx = (void *)rx_q->dma_rx; + desc_size = sizeof(struct dma_desc); + } /* Display RX ring */ - stmmac_display_ring(priv, head_rx, priv->dma_rx_size, true); + stmmac_display_ring(priv, head_rx, priv->dma_rx_size, true, + rx_q->dma_rx_phy, desc_size); } } static void stmmac_display_tx_rings(struct stmmac_priv *priv) { u32 tx_cnt = priv->plat->tx_queues_to_use; + unsigned int desc_size; void *head_tx; u32 queue; @@ -1164,14 +1170,19 @@ static void stmmac_display_tx_rings(struct stmmac_priv *priv) pr_info("\tTX Queue %d rings\n", queue); - if (priv->extend_desc) + if (priv->extend_desc) { head_tx = (void *)tx_q->dma_etx; - else if (tx_q->tbs & STMMAC_TBS_AVAIL) + desc_size = sizeof(struct dma_extended_desc); + } else if (tx_q->tbs & STMMAC_TBS_AVAIL) { head_tx = (void *)tx_q->dma_entx; - else + desc_size = sizeof(struct dma_edesc); + } else { head_tx = (void *)tx_q->dma_tx; + desc_size = sizeof(struct dma_desc); + } - stmmac_display_ring(priv, head_tx, priv->dma_tx_size, false); + stmmac_display_ring(priv, head_tx, priv->dma_tx_size, false, + tx_q->dma_tx_phy, desc_size); } } @@ -1303,9 +1314,10 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p, return -ENOMEM; buf->sec_addr = page_pool_get_dma_addr(buf->sec_page); - stmmac_set_desc_sec_addr(priv, p, buf->sec_addr); + stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, true); } else { buf->sec_page = NULL; + stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false); } buf->addr = page_pool_get_dma_addr(buf->page); @@ -3648,7 +3660,10 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue) DMA_FROM_DEVICE); stmmac_set_desc_addr(priv, p, buf->addr); - stmmac_set_desc_sec_addr(priv, p, buf->sec_addr); + if (priv->sph) + stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, true); + else + stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false); stmmac_refill_desc3(priv, rx_q, p); rx_q->rx_count_frames++; @@ -3736,18 +3751,23 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue) unsigned int count = 0, error = 0, len = 0; int status = 0, coe = priv->hw->rx_csum; unsigned int next_entry = rx_q->cur_rx; + unsigned int desc_size; struct sk_buff *skb = NULL; if (netif_msg_rx_status(priv)) { void *rx_head; netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__); - if (priv->extend_desc) + if (priv->extend_desc) { rx_head = (void *)rx_q->dma_erx; - else + desc_size = sizeof(struct dma_extended_desc); + } else { rx_head = (void *)rx_q->dma_rx; + desc_size = sizeof(struct dma_desc); + } - stmmac_display_ring(priv, rx_head, priv->dma_rx_size, true); + stmmac_display_ring(priv, rx_head, priv->dma_rx_size, true, + rx_q->dma_rx_phy, desc_size); } while (count < limit) { unsigned int buf1_len = 0, buf2_len = 0; @@ -4315,24 +4335,27 @@ static int stmmac_set_mac_address(struct net_device *ndev, void *addr) static struct dentry *stmmac_fs_dir; static void sysfs_display_ring(void *head, int size, int extend_desc, - struct seq_file *seq) + struct seq_file *seq, dma_addr_t dma_phy_addr) { int i; struct dma_extended_desc *ep = (struct dma_extended_desc *)head; struct dma_desc *p = (struct dma_desc *)head; + dma_addr_t dma_addr; for (i = 0; i < size; i++) { if (extend_desc) { - seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", - i, (unsigned int)virt_to_phys(ep), + dma_addr = dma_phy_addr + i * sizeof(*ep); + seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n", + i, &dma_addr, le32_to_cpu(ep->basic.des0), le32_to_cpu(ep->basic.des1), le32_to_cpu(ep->basic.des2), le32_to_cpu(ep->basic.des3)); ep++; } else { - seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n", - i, (unsigned int)virt_to_phys(p), + dma_addr = dma_phy_addr + i * sizeof(*p); + seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n", + i, &dma_addr, le32_to_cpu(p->des0), le32_to_cpu(p->des1), le32_to_cpu(p->des2), le32_to_cpu(p->des3)); p++; @@ -4360,11 +4383,11 @@ static int stmmac_rings_status_show(struct seq_file *seq, void *v) if (priv->extend_desc) { seq_printf(seq, "Extended descriptor ring:\n"); sysfs_display_ring((void *)rx_q->dma_erx, - priv->dma_rx_size, 1, seq); + priv->dma_rx_size, 1, seq, rx_q->dma_rx_phy); } else { seq_printf(seq, "Descriptor ring:\n"); sysfs_display_ring((void *)rx_q->dma_rx, - priv->dma_rx_size, 0, seq); + priv->dma_rx_size, 0, seq, rx_q->dma_rx_phy); } } @@ -4376,11 +4399,11 @@ static int stmmac_rings_status_show(struct seq_file *seq, void *v) if (priv->extend_desc) { seq_printf(seq, "Extended descriptor ring:\n"); sysfs_display_ring((void *)tx_q->dma_etx, - priv->dma_tx_size, 1, seq); + priv->dma_tx_size, 1, seq, tx_q->dma_tx_phy); } else if (!(tx_q->tbs & STMMAC_TBS_AVAIL)) { seq_printf(seq, "Descriptor ring:\n"); sysfs_display_ring((void *)tx_q->dma_tx, - priv->dma_tx_size, 0, seq); + priv->dma_tx_size, 0, seq, tx_q->dma_tx_phy); } } @@ -5144,13 +5167,16 @@ int stmmac_dvr_remove(struct device *dev) netdev_info(priv->dev, "%s: removing driver", __func__); stmmac_stop_all_dma(priv); + stmmac_mac_set(priv, priv->ioaddr, false); + netif_carrier_off(ndev); + unregister_netdev(ndev); + /* Serdes power down needs to happen after VLAN filter + * is deleted that is triggered by unregister_netdev(). + */ if (priv->plat->serdes_powerdown) priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv); - stmmac_mac_set(priv, priv->ioaddr, false); - netif_carrier_off(ndev); - unregister_netdev(ndev); #ifdef CONFIG_DEBUG_FS stmmac_exit_fs(ndev); #endif @@ -5257,6 +5283,8 @@ static void stmmac_reset_queues_param(struct stmmac_priv *priv) tx_q->cur_tx = 0; tx_q->dirty_tx = 0; tx_q->mss = 0; + + netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue)); } } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 56985542e202..44bb133c3000 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -316,6 +316,32 @@ static int tc_setup_cbs(struct stmmac_priv *priv, if (!priv->dma_cap.av) return -EOPNOTSUPP; + /* Port Transmit Rate and Speed Divider */ + switch (priv->speed) { + case SPEED_10000: + ptr = 32; + speed_div = 10000000; + break; + case SPEED_5000: + ptr = 32; + speed_div = 5000000; + break; + case SPEED_2500: + ptr = 8; + speed_div = 2500000; + break; + case SPEED_1000: + ptr = 8; + speed_div = 1000000; + break; + case SPEED_100: + ptr = 4; + speed_div = 100000; + break; + default: + return -EOPNOTSUPP; + } + mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use; if (mode_to_use == MTL_QUEUE_DCB && qopt->enable) { ret = stmmac_dma_qmode(priv, priv->ioaddr, queue, MTL_QUEUE_AVB); @@ -332,10 +358,6 @@ static int tc_setup_cbs(struct stmmac_priv *priv, priv->plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB; } - /* Port Transmit Rate and Speed Divider */ - ptr = (priv->speed == SPEED_100) ? 4 : 8; - speed_div = (priv->speed == SPEED_100) ? 100000 : 1000000; - /* Final adjustments for HW */ value = div_s64(qopt->idleslope * 1024ll * ptr, speed_div); priv->plat->tx_queues_cfg[queue].idle_slope = value & GENMASK(31, 0); diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 68695d4afacd..707ccdd03b19 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -3931,8 +3931,6 @@ static void niu_xmac_interrupt(struct niu *np) mp->rx_mcasts += RXMAC_MC_FRM_CNT_COUNT; if (val & XRXMAC_STATUS_RXBCAST_CNT_EXP) mp->rx_bcasts += RXMAC_BC_FRM_CNT_COUNT; - if (val & XRXMAC_STATUS_RXBCAST_CNT_EXP) - mp->rx_bcasts += RXMAC_BC_FRM_CNT_COUNT; if (val & XRXMAC_STATUS_RXHIST1_CNT_EXP) mp->rx_hist_cnt1 += RXMAC_HIST_CNT1_COUNT; if (val & XRXMAC_STATUS_RXHIST2_CNT_EXP) diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index b8f4f419173f..d054c6e83b1c 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -2044,6 +2044,7 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) /*bdx_hw_reset(priv); */ if (bdx_read_mac(priv)) { pr_err("load MAC address failed\n"); + err = -EFAULT; goto err_out_iomap; } SET_NETDEV_DEV(ndev, &pdev->dev); diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 6fea980acf64..4cd701a9277d 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1817,13 +1817,25 @@ static int axienet_probe(struct platform_device *pdev) lp->options = XAE_OPTION_DEFAULTS; lp->rx_bd_num = RX_BD_NUM_DEFAULT; lp->tx_bd_num = TX_BD_NUM_DEFAULT; + + lp->clk = devm_clk_get_optional(&pdev->dev, NULL); + if (IS_ERR(lp->clk)) { + ret = PTR_ERR(lp->clk); + goto free_netdev; + } + ret = clk_prepare_enable(lp->clk); + if (ret) { + dev_err(&pdev->dev, "Unable to enable clock: %d\n", ret); + goto free_netdev; + } + /* Map device registers */ ethres = platform_get_resource(pdev, IORESOURCE_MEM, 0); lp->regs = devm_ioremap_resource(&pdev->dev, ethres); if (IS_ERR(lp->regs)) { dev_err(&pdev->dev, "could not map Axi Ethernet regs.\n"); ret = PTR_ERR(lp->regs); - goto free_netdev; + goto cleanup_clk; } lp->regs_start = ethres->start; @@ -1898,12 +1910,12 @@ static int axienet_probe(struct platform_device *pdev) break; default: ret = -EINVAL; - goto free_netdev; + goto cleanup_clk; } } else { ret = of_get_phy_mode(pdev->dev.of_node, &lp->phy_mode); if (ret) - goto free_netdev; + goto cleanup_clk; } /* Find the DMA node, map the DMA registers, and decode the DMA IRQs */ @@ -1916,7 +1928,7 @@ static int axienet_probe(struct platform_device *pdev) dev_err(&pdev->dev, "unable to get DMA resource\n"); of_node_put(np); - goto free_netdev; + goto cleanup_clk; } lp->dma_regs = devm_ioremap_resource(&pdev->dev, &dmares); @@ -1936,12 +1948,12 @@ static int axienet_probe(struct platform_device *pdev) if (IS_ERR(lp->dma_regs)) { dev_err(&pdev->dev, "could not map DMA regs\n"); ret = PTR_ERR(lp->dma_regs); - goto free_netdev; + goto cleanup_clk; } if ((lp->rx_irq <= 0) || (lp->tx_irq <= 0)) { dev_err(&pdev->dev, "could not determine irqs\n"); ret = -ENOMEM; - goto free_netdev; + goto cleanup_clk; } /* Autodetect the need for 64-bit DMA pointers. @@ -1971,7 +1983,7 @@ static int axienet_probe(struct platform_device *pdev) ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_width)); if (ret) { dev_err(&pdev->dev, "No suitable DMA available\n"); - goto free_netdev; + goto cleanup_clk; } /* Check for Ethernet core IRQ (optional) */ @@ -1992,20 +2004,6 @@ static int axienet_probe(struct platform_device *pdev) lp->phy_node = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); if (lp->phy_node) { - lp->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(lp->clk)) { - dev_warn(&pdev->dev, "Failed to get clock: %ld\n", - PTR_ERR(lp->clk)); - lp->clk = NULL; - } else { - ret = clk_prepare_enable(lp->clk); - if (ret) { - dev_err(&pdev->dev, "Unable to enable clock: %d\n", - ret); - goto free_netdev; - } - } - ret = axienet_mdio_setup(lp); if (ret) dev_warn(&pdev->dev, @@ -2016,12 +2014,12 @@ static int axienet_probe(struct platform_device *pdev) if (!lp->phy_node) { dev_err(&pdev->dev, "phy-handle required for 1000BaseX/SGMII\n"); ret = -EINVAL; - goto free_netdev; + goto cleanup_mdio; } lp->pcs_phy = of_mdio_find_device(lp->phy_node); if (!lp->pcs_phy) { ret = -EPROBE_DEFER; - goto free_netdev; + goto cleanup_mdio; } lp->phylink_config.pcs_poll = true; } @@ -2035,17 +2033,30 @@ static int axienet_probe(struct platform_device *pdev) if (IS_ERR(lp->phylink)) { ret = PTR_ERR(lp->phylink); dev_err(&pdev->dev, "phylink_create error (%i)\n", ret); - goto free_netdev; + goto cleanup_mdio; } ret = register_netdev(lp->ndev); if (ret) { dev_err(lp->dev, "register_netdev() error (%i)\n", ret); - goto free_netdev; + goto cleanup_phylink; } return 0; +cleanup_phylink: + phylink_destroy(lp->phylink); + +cleanup_mdio: + if (lp->pcs_phy) + put_device(&lp->pcs_phy->dev); + if (lp->mii_bus) + axienet_mdio_teardown(lp); + of_node_put(lp->phy_node); + +cleanup_clk: + clk_disable_unprepare(lp->clk); + free_netdev: free_netdev(ndev); diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 4c04e271f184..fd3c2d86e48b 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -539,7 +539,6 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) && mtu < ntohs(iph->tot_len)) { netdev_dbg(dev, "packet too big, fragmentation needed\n"); - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); goto err_rt; diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index 84bb8ae92725..eb1c8396bcdd 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -581,10 +581,10 @@ ipa_resource_config(struct ipa *ipa, const struct ipa_resource_data *data) return -EINVAL; for (i = 0; i < data->resource_src_count; i++) - ipa_resource_config_src(ipa, data->resource_src); + ipa_resource_config_src(ipa, &data->resource_src[i]); for (i = 0; i < data->resource_dst_count; i++) - ipa_resource_config_dst(ipa, data->resource_dst); + ipa_resource_config_dst(ipa, &data->resource_dst[i]); return 0; } diff --git a/drivers/net/ipa/ipa_qmi.c b/drivers/net/ipa/ipa_qmi.c index 2fc64483f275..e594bf3b600f 100644 --- a/drivers/net/ipa/ipa_qmi.c +++ b/drivers/net/ipa/ipa_qmi.c @@ -249,6 +249,7 @@ static const struct qmi_msg_handler ipa_server_msg_handlers[] = { .decoded_size = IPA_QMI_DRIVER_INIT_COMPLETE_REQ_SZ, .fn = ipa_server_driver_init_complete, }, + { }, }; /* Handle an INIT_DRIVER response message from the modem. */ @@ -269,6 +270,7 @@ static const struct qmi_msg_handler ipa_client_msg_handlers[] = { .decoded_size = IPA_QMI_INIT_DRIVER_RSP_SZ, .fn = ipa_client_init_driver, }, + { }, }; /* Return a pointer to an init modem driver request structure, which contains diff --git a/drivers/net/ipa/ipa_reg.h b/drivers/net/ipa/ipa_reg.h index e6b0827a244e..732e691e9aa6 100644 --- a/drivers/net/ipa/ipa_reg.h +++ b/drivers/net/ipa/ipa_reg.h @@ -408,15 +408,18 @@ enum ipa_cs_offload_en { static inline u32 ipa_header_size_encoded(enum ipa_version version, u32 header_size) { + u32 size = header_size & field_mask(HDR_LEN_FMASK); u32 val; - val = u32_encode_bits(header_size, HDR_LEN_FMASK); - if (version < IPA_VERSION_4_5) + val = u32_encode_bits(size, HDR_LEN_FMASK); + if (version < IPA_VERSION_4_5) { + /* ipa_assert(header_size == size); */ return val; + } /* IPA v4.5 adds a few more most-significant bits */ - header_size >>= hweight32(HDR_LEN_FMASK); - val |= u32_encode_bits(header_size, HDR_LEN_MSB_FMASK); + size = header_size >> hweight32(HDR_LEN_FMASK); + val |= u32_encode_bits(size, HDR_LEN_MSB_FMASK); return val; } @@ -425,15 +428,18 @@ static inline u32 ipa_header_size_encoded(enum ipa_version version, static inline u32 ipa_metadata_offset_encoded(enum ipa_version version, u32 offset) { + u32 off = offset & field_mask(HDR_OFST_METADATA_FMASK); u32 val; - val = u32_encode_bits(offset, HDR_OFST_METADATA_FMASK); - if (version < IPA_VERSION_4_5) + val = u32_encode_bits(off, HDR_OFST_METADATA_FMASK); + if (version < IPA_VERSION_4_5) { + /* ipa_assert(offset == off); */ return val; + } /* IPA v4.5 adds a few more most-significant bits */ - offset >>= hweight32(HDR_OFST_METADATA_FMASK); - val |= u32_encode_bits(offset, HDR_OFST_METADATA_MSB_FMASK); + off = offset >> hweight32(HDR_OFST_METADATA_FMASK); + val |= u32_encode_bits(off, HDR_OFST_METADATA_MSB_FMASK); return val; } diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 7178468302c8..ad6dbf011052 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -296,6 +296,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port) dev_net_set(dev, nsim_dev_net(nsim_dev)); ns = netdev_priv(dev); ns->netdev = dev; + u64_stats_init(&ns->syncp); ns->nsim_dev = nsim_dev; ns->nsim_dev_port = nsim_dev_port; ns->nsim_bus_dev = nsim_dev->nsim_bus_dev; diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 8a4ec3222168..07a98c324942 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -26,7 +26,46 @@ MODULE_DESCRIPTION("Broadcom PHY driver"); MODULE_AUTHOR("Maciej W. Rozycki"); MODULE_LICENSE("GPL"); -static int bcm54xx_config_clock_delay(struct phy_device *phydev); +static int bcm54xx_config_clock_delay(struct phy_device *phydev) +{ + int rc, val; + + /* handling PHY's internal RX clock delay */ + val = bcm54xx_auxctl_read(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC); + val |= MII_BCM54XX_AUXCTL_MISC_WREN; + if (phydev->interface == PHY_INTERFACE_MODE_RGMII || + phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) { + /* Disable RGMII RXC-RXD skew */ + val &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN; + } + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) { + /* Enable RGMII RXC-RXD skew */ + val |= MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN; + } + rc = bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC, + val); + if (rc < 0) + return rc; + + /* handling PHY's internal TX clock delay */ + val = bcm_phy_read_shadow(phydev, BCM54810_SHD_CLK_CTL); + if (phydev->interface == PHY_INTERFACE_MODE_RGMII || + phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) { + /* Disable internal TX clock delay */ + val &= ~BCM54810_SHD_CLK_CTL_GTXCLK_EN; + } + if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || + phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) { + /* Enable internal TX clock delay */ + val |= BCM54810_SHD_CLK_CTL_GTXCLK_EN; + } + rc = bcm_phy_write_shadow(phydev, BCM54810_SHD_CLK_CTL, val); + if (rc < 0) + return rc; + + return 0; +} static int bcm54210e_config_init(struct phy_device *phydev) { @@ -64,45 +103,62 @@ static int bcm54612e_config_init(struct phy_device *phydev) return 0; } -static int bcm54xx_config_clock_delay(struct phy_device *phydev) +static int bcm54616s_config_init(struct phy_device *phydev) { int rc, val; - /* handling PHY's internal RX clock delay */ + if (phydev->interface != PHY_INTERFACE_MODE_SGMII && + phydev->interface != PHY_INTERFACE_MODE_1000BASEX) + return 0; + + /* Ensure proper interface mode is selected. */ + /* Disable RGMII mode */ val = bcm54xx_auxctl_read(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC); + if (val < 0) + return val; + val &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_EN; val |= MII_BCM54XX_AUXCTL_MISC_WREN; - if (phydev->interface == PHY_INTERFACE_MODE_RGMII || - phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) { - /* Disable RGMII RXC-RXD skew */ - val &= ~MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN; - } - if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || - phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) { - /* Enable RGMII RXC-RXD skew */ - val |= MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN; - } rc = bcm54xx_auxctl_write(phydev, MII_BCM54XX_AUXCTL_SHDWSEL_MISC, val); if (rc < 0) return rc; - /* handling PHY's internal TX clock delay */ - val = bcm_phy_read_shadow(phydev, BCM54810_SHD_CLK_CTL); - if (phydev->interface == PHY_INTERFACE_MODE_RGMII || - phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) { - /* Disable internal TX clock delay */ - val &= ~BCM54810_SHD_CLK_CTL_GTXCLK_EN; - } - if (phydev->interface == PHY_INTERFACE_MODE_RGMII_ID || - phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) { - /* Enable internal TX clock delay */ - val |= BCM54810_SHD_CLK_CTL_GTXCLK_EN; - } - rc = bcm_phy_write_shadow(phydev, BCM54810_SHD_CLK_CTL, val); + /* Select 1000BASE-X register set (primary SerDes) */ + val = bcm_phy_read_shadow(phydev, BCM54XX_SHD_MODE); + if (val < 0) + return val; + val |= BCM54XX_SHD_MODE_1000BX; + rc = bcm_phy_write_shadow(phydev, BCM54XX_SHD_MODE, val); if (rc < 0) return rc; - return 0; + /* Power down SerDes interface */ + rc = phy_set_bits(phydev, MII_BMCR, BMCR_PDOWN); + if (rc < 0) + return rc; + + /* Select proper interface mode */ + val &= ~BCM54XX_SHD_INTF_SEL_MASK; + val |= phydev->interface == PHY_INTERFACE_MODE_SGMII ? + BCM54XX_SHD_INTF_SEL_SGMII : + BCM54XX_SHD_INTF_SEL_GBIC; + rc = bcm_phy_write_shadow(phydev, BCM54XX_SHD_MODE, val); + if (rc < 0) + return rc; + + /* Power up SerDes interface */ + rc = phy_clear_bits(phydev, MII_BMCR, BMCR_PDOWN); + if (rc < 0) + return rc; + + /* Select copper register set */ + val &= ~BCM54XX_SHD_MODE_1000BX; + rc = bcm_phy_write_shadow(phydev, BCM54XX_SHD_MODE, val); + if (rc < 0) + return rc; + + /* Power up copper interface */ + return phy_clear_bits(phydev, MII_BMCR, BMCR_PDOWN); } /* Needs SMDSP clock enabled via bcm54xx_phydsp_config() */ @@ -283,15 +339,21 @@ static int bcm54xx_config_init(struct phy_device *phydev) bcm54xx_adjust_rxrefclk(phydev); - if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54210E) { + switch (BRCM_PHY_MODEL(phydev)) { + case PHY_ID_BCM50610: + case PHY_ID_BCM50610M: + err = bcm54xx_config_clock_delay(phydev); + break; + case PHY_ID_BCM54210E: err = bcm54210e_config_init(phydev); - if (err) - return err; - } else if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54612E) { + break; + case PHY_ID_BCM54612E: err = bcm54612e_config_init(phydev); - if (err) - return err; - } else if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810) { + break; + case PHY_ID_BCM54616S: + err = bcm54616s_config_init(phydev); + break; + case PHY_ID_BCM54810: /* For BCM54810, we need to disable BroadR-Reach function */ val = bcm_phy_read_exp(phydev, BCM54810_EXP_BROADREACH_LRE_MISC_CTL); @@ -299,9 +361,10 @@ static int bcm54xx_config_init(struct phy_device *phydev) err = bcm_phy_write_exp(phydev, BCM54810_EXP_BROADREACH_LRE_MISC_CTL, val); - if (err < 0) - return err; + break; } + if (err) + return err; bcm54xx_phydsp_config(phydev); @@ -332,6 +395,11 @@ static int bcm54xx_resume(struct phy_device *phydev) if (ret < 0) return ret; + /* Upon exiting power down, the PHY remains in an internal reset state + * for 40us + */ + fsleep(40); + return bcm54xx_config_init(phydev); } @@ -475,7 +543,7 @@ static int bcm5481_config_aneg(struct phy_device *phydev) static int bcm54616s_probe(struct phy_device *phydev) { - int val, intf_sel; + int val; val = bcm_phy_read_shadow(phydev, BCM54XX_SHD_MODE); if (val < 0) @@ -487,8 +555,7 @@ static int bcm54616s_probe(struct phy_device *phydev) * RGMII-1000Base-X is properly supported, but RGMII-100Base-FX * support is still missing as of now. */ - intf_sel = (val & BCM54XX_SHD_INTF_SEL_MASK) >> 1; - if (intf_sel == 1) { + if ((val & BCM54XX_SHD_INTF_SEL_MASK) == BCM54XX_SHD_INTF_SEL_RGMII) { val = bcm_phy_read_shadow(phydev, BCM54616S_SHD_100FX_CTRL); if (val < 0) return val; @@ -500,6 +567,8 @@ static int bcm54616s_probe(struct phy_device *phydev) */ if (!(val & BCM54616S_100FX_MODE)) phydev->dev_flags |= PHY_BCM_FLAGS_MODE_1000BX; + + phydev->port = PORT_FIBRE; } return 0; diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index fff371ca1086..f7a2ec150e54 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -290,6 +290,7 @@ static int dp83822_config_intr(struct phy_device *phydev) static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev) { + bool trigger_machine = false; int irq_status; /* The MISR1 and MISR2 registers are holding the interrupt status in @@ -305,7 +306,7 @@ static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev) return IRQ_NONE; } if (irq_status & ((irq_status & GENMASK(7, 0)) << 8)) - goto trigger_machine; + trigger_machine = true; irq_status = phy_read(phydev, MII_DP83822_MISR2); if (irq_status < 0) { @@ -313,11 +314,11 @@ static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev) return IRQ_NONE; } if (irq_status & ((irq_status & GENMASK(7, 0)) << 8)) - goto trigger_machine; + trigger_machine = true; - return IRQ_NONE; + if (!trigger_machine) + return IRQ_NONE; -trigger_machine: phy_trigger_machine(phydev); return IRQ_HANDLED; @@ -554,6 +555,9 @@ static int dp83822_probe(struct phy_device *phydev) dp83822_of_init(phydev); + if (dp83822->fx_enabled) + phydev->port = PORT_FIBRE; + return 0; } diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c index b30bc142d82e..755220c6451f 100644 --- a/drivers/net/phy/dp83869.c +++ b/drivers/net/phy/dp83869.c @@ -855,6 +855,10 @@ static int dp83869_probe(struct phy_device *phydev) if (ret) return ret; + if (dp83869->mode == DP83869_RGMII_100_BASE || + dp83869->mode == DP83869_RGMII_1000_BASE) + phydev->port = PORT_FIBRE; + return dp83869_config_init(phydev); } diff --git a/drivers/net/phy/dp83tc811.c b/drivers/net/phy/dp83tc811.c index 688fadffb249..7ea32fb77190 100644 --- a/drivers/net/phy/dp83tc811.c +++ b/drivers/net/phy/dp83tc811.c @@ -264,6 +264,7 @@ static int dp83811_config_intr(struct phy_device *phydev) static irqreturn_t dp83811_handle_interrupt(struct phy_device *phydev) { + bool trigger_machine = false; int irq_status; /* The INT_STAT registers 1, 2 and 3 are holding the interrupt status @@ -279,7 +280,7 @@ static irqreturn_t dp83811_handle_interrupt(struct phy_device *phydev) return IRQ_NONE; } if (irq_status & ((irq_status & GENMASK(7, 0)) << 8)) - goto trigger_machine; + trigger_machine = true; irq_status = phy_read(phydev, MII_DP83811_INT_STAT2); if (irq_status < 0) { @@ -287,7 +288,7 @@ static irqreturn_t dp83811_handle_interrupt(struct phy_device *phydev) return IRQ_NONE; } if (irq_status & ((irq_status & GENMASK(7, 0)) << 8)) - goto trigger_machine; + trigger_machine = true; irq_status = phy_read(phydev, MII_DP83811_INT_STAT3); if (irq_status < 0) { @@ -295,11 +296,11 @@ static irqreturn_t dp83811_handle_interrupt(struct phy_device *phydev) return IRQ_NONE; } if (irq_status & ((irq_status & GENMASK(7, 0)) << 8)) - goto trigger_machine; + trigger_machine = true; - return IRQ_NONE; + if (!trigger_machine) + return IRQ_NONE; -trigger_machine: phy_trigger_machine(phydev); return IRQ_HANDLED; diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c index 0ee23d29c0d4..bde3356a2f86 100644 --- a/drivers/net/phy/lxt.c +++ b/drivers/net/phy/lxt.c @@ -292,6 +292,7 @@ static int lxt973_probe(struct phy_device *phydev) phy_write(phydev, MII_BMCR, val); /* Remember that the port is in fiber mode. */ phydev->priv = lxt973_probe; + phydev->port = PORT_FIBRE; } else { phydev->priv = NULL; } diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 620052c023a5..2afef45d15b1 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -1552,6 +1552,7 @@ static int marvell_read_status_page(struct phy_device *phydev, int page) phydev->asym_pause = 0; phydev->speed = SPEED_UNKNOWN; phydev->duplex = DUPLEX_UNKNOWN; + phydev->port = fiber ? PORT_FIBRE : PORT_TP; if (phydev->autoneg == AUTONEG_ENABLE) err = marvell_read_status_page_an(phydev, fiber, status); diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 1901ba277413..b1bb9b8e1e4e 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -631,6 +631,7 @@ static int mv3310_read_status_10gbaser(struct phy_device *phydev) phydev->link = 1; phydev->speed = SPEED_10000; phydev->duplex = DUPLEX_FULL; + phydev->port = PORT_FIBRE; return 0; } @@ -690,6 +691,7 @@ static int mv3310_read_status_copper(struct phy_device *phydev) phydev->duplex = cssr1 & MV_PCS_CSSR1_DUPLEX_FULL ? DUPLEX_FULL : DUPLEX_HALF; + phydev->port = PORT_TP; phydev->mdix = cssr1 & MV_PCS_CSSR1_MDIX ? ETH_TP_MDI_X : ETH_TP_MDI; diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 54e0d75203da..a6c691938f94 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -341,14 +341,19 @@ static int kszphy_config_init(struct phy_device *phydev) return kszphy_config_reset(phydev); } +static int ksz8041_fiber_mode(struct phy_device *phydev) +{ + struct device_node *of_node = phydev->mdio.dev.of_node; + + return of_property_read_bool(of_node, "micrel,fiber-mode"); +} + static int ksz8041_config_init(struct phy_device *phydev) { __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, }; - struct device_node *of_node = phydev->mdio.dev.of_node; - /* Limit supported and advertised modes in fiber mode */ - if (of_property_read_bool(of_node, "micrel,fiber-mode")) { + if (ksz8041_fiber_mode(phydev)) { phydev->dev_flags |= MICREL_PHY_FXEN; linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Full_BIT, mask); linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT, mask); @@ -1176,6 +1181,9 @@ static int kszphy_probe(struct phy_device *phydev) } } + if (ksz8041_fiber_mode(phydev)) + phydev->port = PORT_FIBRE; + /* Support legacy board-file configuration */ if (phydev->dev_flags & MICREL_PHY_50MHZ_CLK) { priv->rmii_ref_clk_sel = true; @@ -1295,6 +1303,7 @@ static struct phy_driver ksphy_driver[] = { .driver_data = &ksz8081_type, .probe = kszphy_probe, .config_init = ksz8081_config_init, + .soft_reset = genphy_soft_reset, .config_intr = kszphy_config_intr, .handle_interrupt = kszphy_handle_interrupt, .get_sset_count = kszphy_get_sset_count, diff --git a/drivers/net/phy/mscc/Makefile b/drivers/net/phy/mscc/Makefile index d8e22a4eeeff..78d84194f79a 100644 --- a/drivers/net/phy/mscc/Makefile +++ b/drivers/net/phy/mscc/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_MICROSEMI_PHY) := mscc.o mscc-objs := mscc_main.o +mscc-objs += mscc_serdes.o ifdef CONFIG_MACSEC mscc-objs += mscc_macsec.o diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h index 9481bce94c2e..a50235fdf7d9 100644 --- a/drivers/net/phy/mscc/mscc.h +++ b/drivers/net/phy/mscc/mscc.h @@ -102,6 +102,7 @@ enum rgmii_clock_delay { #define PHY_MCB_S6G_READ BIT(30) #define PHY_S6G_PLL5G_CFG0 0x06 +#define PHY_S6G_PLL5G_CFG2 0x08 #define PHY_S6G_LCPLL_CFG 0x11 #define PHY_S6G_PLL_CFG 0x2b #define PHY_S6G_COMMON_CFG 0x2c @@ -121,6 +122,9 @@ enum rgmii_clock_delay { #define PHY_S6G_PLL_FSM_CTRL_DATA_POS 8 #define PHY_S6G_PLL_FSM_ENA_POS 7 +#define PHY_S6G_CFG2_FSM_DIS 1 +#define PHY_S6G_CFG2_FSM_CLK_BP 23 + #define MSCC_EXT_PAGE_ACCESS 31 #define MSCC_PHY_PAGE_STANDARD 0x0000 /* Standard registers */ #define MSCC_PHY_PAGE_EXTENDED 0x0001 /* Extended registers */ @@ -136,6 +140,10 @@ enum rgmii_clock_delay { #define MSCC_PHY_PAGE_1588 0x1588 /* PTP (1588) */ #define MSCC_PHY_PAGE_TEST 0x2a30 /* Test reg */ #define MSCC_PHY_PAGE_TR 0x52b5 /* Token ring registers */ +#define MSCC_PHY_GPIO_CONTROL_2 14 + +#define MSCC_PHY_COMA_MODE 0x2000 /* input(1) / output(0) */ +#define MSCC_PHY_COMA_OUTPUT 0x1000 /* value to output */ /* Extended Page 1 Registers */ #define MSCC_PHY_CU_MEDIA_CRC_VALID_CNT 18 @@ -335,6 +343,10 @@ enum rgmii_clock_delay { #define VSC8584_REVB 0x0001 #define MSCC_DEV_REV_MASK GENMASK(3, 0) +#define MSCC_ROM_TRAP_SERDES_6G_CFG 0x1E48 +#define MSCC_RAM_TRAP_SERDES_6G_CFG 0x1E4F +#define PATCH_VEC_ZERO_EN 0x0100 + struct reg_val { u16 reg; u32 val; @@ -412,6 +424,22 @@ struct vsc8531_edge_rate_table { }; #endif /* CONFIG_OF_MDIO */ +enum csr_target { + MACRO_CTRL = 0x07, +}; + +u32 vsc85xx_csr_read(struct phy_device *phydev, + enum csr_target target, u32 reg); + +int vsc85xx_csr_write(struct phy_device *phydev, + enum csr_target target, u32 reg, u32 val); + +int phy_base_write(struct phy_device *phydev, u32 regnum, u16 val); +int phy_base_read(struct phy_device *phydev, u32 regnum); +int phy_update_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb); +int phy_commit_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb); +int vsc8584_cmd(struct phy_device *phydev, u16 val); + #if IS_ENABLED(CONFIG_MACSEC) int vsc8584_macsec_init(struct phy_device *phydev); void vsc8584_handle_macsec_interrupt(struct phy_device *phydev); diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c index 2f2157e3deab..3a7705228ed5 100644 --- a/drivers/net/phy/mscc/mscc_main.c +++ b/drivers/net/phy/mscc/mscc_main.c @@ -17,7 +17,7 @@ #include #include #include - +#include "mscc_serdes.h" #include "mscc.h" static const struct vsc85xx_hw_stat vsc85xx_hw_stats[] = { @@ -689,7 +689,7 @@ static int vsc85xx_eee_init_seq_set(struct phy_device *phydev) } /* phydev->bus->mdio_lock should be locked when using this function */ -static int phy_base_write(struct phy_device *phydev, u32 regnum, u16 val) +int phy_base_write(struct phy_device *phydev, u32 regnum, u16 val) { if (unlikely(!mutex_is_locked(&phydev->mdio.bus->mdio_lock))) { dev_err(&phydev->mdio.dev, "MDIO bus lock not held!\n"); @@ -700,7 +700,7 @@ static int phy_base_write(struct phy_device *phydev, u32 regnum, u16 val) } /* phydev->bus->mdio_lock should be locked when using this function */ -static int phy_base_read(struct phy_device *phydev, u32 regnum) +int phy_base_read(struct phy_device *phydev, u32 regnum) { if (unlikely(!mutex_is_locked(&phydev->mdio.bus->mdio_lock))) { dev_err(&phydev->mdio.dev, "MDIO bus lock not held!\n"); @@ -710,6 +710,113 @@ static int phy_base_read(struct phy_device *phydev, u32 regnum) return __phy_package_read(phydev, regnum); } +u32 vsc85xx_csr_read(struct phy_device *phydev, + enum csr_target target, u32 reg) +{ + unsigned long deadline; + u32 val, val_l, val_h; + + phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_CSR_CNTL); + + /* CSR registers are grouped under different Target IDs. + * 6-bit Target_ID is split between MSCC_EXT_PAGE_CSR_CNTL_20 and + * MSCC_EXT_PAGE_CSR_CNTL_19 registers. + * Target_ID[5:2] maps to bits[3:0] of MSCC_EXT_PAGE_CSR_CNTL_20 + * and Target_ID[1:0] maps to bits[13:12] of MSCC_EXT_PAGE_CSR_CNTL_19. + */ + + /* Setup the Target ID */ + phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_20, + MSCC_PHY_CSR_CNTL_20_TARGET(target >> 2)); + + if ((target >> 2 == 0x1) || (target >> 2 == 0x3)) + /* non-MACsec access */ + target &= 0x3; + else + target = 0; + + /* Trigger CSR Action - Read into the CSR's */ + phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_19, + MSCC_PHY_CSR_CNTL_19_CMD | MSCC_PHY_CSR_CNTL_19_READ | + MSCC_PHY_CSR_CNTL_19_REG_ADDR(reg) | + MSCC_PHY_CSR_CNTL_19_TARGET(target)); + + /* Wait for register access*/ + deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS); + do { + usleep_range(500, 1000); + val = phy_base_read(phydev, MSCC_EXT_PAGE_CSR_CNTL_19); + } while (time_before(jiffies, deadline) && + !(val & MSCC_PHY_CSR_CNTL_19_CMD)); + + if (!(val & MSCC_PHY_CSR_CNTL_19_CMD)) + return 0xffffffff; + + /* Read the Least Significant Word (LSW) (17) */ + val_l = phy_base_read(phydev, MSCC_EXT_PAGE_CSR_CNTL_17); + + /* Read the Most Significant Word (MSW) (18) */ + val_h = phy_base_read(phydev, MSCC_EXT_PAGE_CSR_CNTL_18); + + phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, + MSCC_PHY_PAGE_STANDARD); + + return (val_h << 16) | val_l; +} + +int vsc85xx_csr_write(struct phy_device *phydev, + enum csr_target target, u32 reg, u32 val) +{ + unsigned long deadline; + + phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_CSR_CNTL); + + /* CSR registers are grouped under different Target IDs. + * 6-bit Target_ID is split between MSCC_EXT_PAGE_CSR_CNTL_20 and + * MSCC_EXT_PAGE_CSR_CNTL_19 registers. + * Target_ID[5:2] maps to bits[3:0] of MSCC_EXT_PAGE_CSR_CNTL_20 + * and Target_ID[1:0] maps to bits[13:12] of MSCC_EXT_PAGE_CSR_CNTL_19. + */ + + /* Setup the Target ID */ + phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_20, + MSCC_PHY_CSR_CNTL_20_TARGET(target >> 2)); + + /* Write the Least Significant Word (LSW) (17) */ + phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_17, (u16)val); + + /* Write the Most Significant Word (MSW) (18) */ + phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_18, (u16)(val >> 16)); + + if ((target >> 2 == 0x1) || (target >> 2 == 0x3)) + /* non-MACsec access */ + target &= 0x3; + else + target = 0; + + /* Trigger CSR Action - Write into the CSR's */ + phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_19, + MSCC_PHY_CSR_CNTL_19_CMD | + MSCC_PHY_CSR_CNTL_19_REG_ADDR(reg) | + MSCC_PHY_CSR_CNTL_19_TARGET(target)); + + /* Wait for register access */ + deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS); + do { + usleep_range(500, 1000); + val = phy_base_read(phydev, MSCC_EXT_PAGE_CSR_CNTL_19); + } while (time_before(jiffies, deadline) && + !(val & MSCC_PHY_CSR_CNTL_19_CMD)); + + if (!(val & MSCC_PHY_CSR_CNTL_19_CMD)) + return -ETIMEDOUT; + + phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, + MSCC_PHY_PAGE_STANDARD); + + return 0; +} + /* bus->mdio_lock should be locked when using this function */ static void vsc8584_csr_write(struct phy_device *phydev, u16 addr, u32 val) { @@ -719,7 +826,7 @@ static void vsc8584_csr_write(struct phy_device *phydev, u16 addr, u32 val) } /* bus->mdio_lock should be locked when using this function */ -static int vsc8584_cmd(struct phy_device *phydev, u16 val) +int vsc8584_cmd(struct phy_device *phydev, u16 val) { unsigned long deadline; u16 reg_val; @@ -1131,6 +1238,92 @@ static int vsc8574_config_pre_init(struct phy_device *phydev) return ret; } +/* Access LCPLL Cfg_2 */ +static void vsc8584_pll5g_cfg2_wr(struct phy_device *phydev, + bool disable_fsm) +{ + u32 rd_dat; + + rd_dat = vsc85xx_csr_read(phydev, MACRO_CTRL, PHY_S6G_PLL5G_CFG2); + rd_dat &= ~BIT(PHY_S6G_CFG2_FSM_DIS); + rd_dat |= (disable_fsm << PHY_S6G_CFG2_FSM_DIS); + vsc85xx_csr_write(phydev, MACRO_CTRL, PHY_S6G_PLL5G_CFG2, rd_dat); +} + +/* trigger a read to the spcified MCB */ +static int vsc8584_mcb_rd_trig(struct phy_device *phydev, + u32 mcb_reg_addr, u8 mcb_slave_num) +{ + u32 rd_dat = 0; + + /* read MCB */ + vsc85xx_csr_write(phydev, MACRO_CTRL, mcb_reg_addr, + (0x40000000 | (1L << mcb_slave_num))); + + return read_poll_timeout(vsc85xx_csr_read, rd_dat, + !(rd_dat & 0x40000000), + 4000, 200000, 0, + phydev, MACRO_CTRL, mcb_reg_addr); +} + +/* trigger a write to the spcified MCB */ +static int vsc8584_mcb_wr_trig(struct phy_device *phydev, + u32 mcb_reg_addr, + u8 mcb_slave_num) +{ + u32 rd_dat = 0; + + /* write back MCB */ + vsc85xx_csr_write(phydev, MACRO_CTRL, mcb_reg_addr, + (0x80000000 | (1L << mcb_slave_num))); + + return read_poll_timeout(vsc85xx_csr_read, rd_dat, + !(rd_dat & 0x80000000), + 4000, 200000, 0, + phydev, MACRO_CTRL, mcb_reg_addr); +} + +/* Sequence to Reset LCPLL for the VIPER and ELISE PHY */ +static int vsc8584_pll5g_reset(struct phy_device *phydev) +{ + bool dis_fsm; + int ret = 0; + + ret = vsc8584_mcb_rd_trig(phydev, 0x11, 0); + if (ret < 0) + goto done; + dis_fsm = 1; + + /* Reset LCPLL */ + vsc8584_pll5g_cfg2_wr(phydev, dis_fsm); + + /* write back LCPLL MCB */ + ret = vsc8584_mcb_wr_trig(phydev, 0x11, 0); + if (ret < 0) + goto done; + + /* 10 mSec sleep while LCPLL is hold in reset */ + usleep_range(10000, 20000); + + /* read LCPLL MCB into CSRs */ + ret = vsc8584_mcb_rd_trig(phydev, 0x11, 0); + if (ret < 0) + goto done; + dis_fsm = 0; + + /* Release the Reset of LCPLL */ + vsc8584_pll5g_cfg2_wr(phydev, dis_fsm); + + /* write back LCPLL MCB */ + ret = vsc8584_mcb_wr_trig(phydev, 0x11, 0); + if (ret < 0) + goto done; + + usleep_range(110000, 200000); +done: + return ret; +} + /* bus->mdio_lock should be locked when using this function */ static int vsc8584_config_pre_init(struct phy_device *phydev) { @@ -1323,6 +1516,21 @@ static void vsc8584_get_base_addr(struct phy_device *phydev) vsc8531->addr = addr; } +static void vsc85xx_coma_mode_release(struct phy_device *phydev) +{ + /* The coma mode (pin or reg) provides an optional feature that + * may be used to control when the PHYs become active. + * Alternatively the COMA_MODE pin may be connected low + * so that the PHYs are fully active once out of reset. + */ + + /* Enable output (mode=0) and write zero to it */ + vsc85xx_phy_write_page(phydev, MSCC_PHY_PAGE_EXTENDED_GPIO); + __phy_modify(phydev, MSCC_PHY_GPIO_CONTROL_2, + MSCC_PHY_COMA_MODE | MSCC_PHY_COMA_OUTPUT, 0); + vsc85xx_phy_write_page(phydev, MSCC_PHY_PAGE_STANDARD); +} + static int vsc8584_config_init(struct phy_device *phydev) { struct vsc8531_private *vsc8531 = phydev->priv; @@ -1541,6 +1749,100 @@ static int vsc85xx_config_init(struct phy_device *phydev) return 0; } +static int __phy_write_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb, + u32 op) +{ + unsigned long deadline; + u32 val; + int ret; + + ret = vsc85xx_csr_write(phydev, PHY_MCB_TARGET, reg, + op | (1 << mcb)); + if (ret) + return -EINVAL; + + deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS); + do { + usleep_range(500, 1000); + val = vsc85xx_csr_read(phydev, PHY_MCB_TARGET, reg); + + if (val == 0xffffffff) + return -EIO; + + } while (time_before(jiffies, deadline) && (val & op)); + + if (val & op) + return -ETIMEDOUT; + + return 0; +} + +/* Trigger a read to the specified MCB */ +int phy_update_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb) +{ + return __phy_write_mcb_s6g(phydev, reg, mcb, PHY_MCB_S6G_READ); +} + +/* Trigger a write to the specified MCB */ +int phy_commit_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb) +{ + return __phy_write_mcb_s6g(phydev, reg, mcb, PHY_MCB_S6G_WRITE); +} + +static int vsc8514_config_host_serdes(struct phy_device *phydev) +{ + int ret; + u16 val; + + ret = phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, + MSCC_PHY_PAGE_EXTENDED_GPIO); + if (ret) + return ret; + + val = phy_base_read(phydev, MSCC_PHY_MAC_CFG_FASTLINK); + val &= ~MAC_CFG_MASK; + val |= MAC_CFG_QSGMII; + ret = phy_base_write(phydev, MSCC_PHY_MAC_CFG_FASTLINK, val); + if (ret) + return ret; + + ret = phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, + MSCC_PHY_PAGE_STANDARD); + if (ret) + return ret; + + ret = vsc8584_cmd(phydev, PROC_CMD_NOP); + if (ret) + return ret; + + ret = vsc8584_cmd(phydev, + PROC_CMD_MCB_ACCESS_MAC_CONF | + PROC_CMD_RST_CONF_PORT | + PROC_CMD_READ_MOD_WRITE_PORT | PROC_CMD_QSGMII_MAC); + if (ret) { + dev_err(&phydev->mdio.dev, "%s: QSGMII error: %d\n", + __func__, ret); + return ret; + } + + /* Apply 6G SerDes FOJI Algorithm + * Initial condition requirement: + * 1. hold 8051 in reset + * 2. disable patch vector 0, in order to allow IB cal poll during FoJi + * 3. deassert 8051 reset after change patch vector status + * 4. proceed with FoJi (vsc85xx_sd6g_config_v2) + */ + vsc8584_micro_assert_reset(phydev); + val = phy_base_read(phydev, MSCC_INT_MEM_CNTL); + /* clear bit 8, to disable patch vector 0 */ + val &= ~PATCH_VEC_ZERO_EN; + ret = phy_base_write(phydev, MSCC_INT_MEM_CNTL, val); + /* Enable 8051 clock, don't set patch present, disable PRAM clock override */ + vsc8584_micro_deassert_reset(phydev, false); + + return vsc85xx_sd6g_config_v2(phydev); +} + static int vsc8514_config_pre_init(struct phy_device *phydev) { /* These are the settings to override the silicon default @@ -1569,8 +1871,16 @@ static int vsc8514_config_pre_init(struct phy_device *phydev) {0x16b2, 0x00007000}, {0x16b4, 0x00000814}, }; + struct device *dev = &phydev->mdio.dev; unsigned int i; u16 reg; + int ret; + + ret = vsc8584_pll5g_reset(phydev); + if (ret < 0) { + dev_err(dev, "failed LCPLL reset, ret: %d\n", ret); + return ret; + } phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_STANDARD); @@ -1602,151 +1912,48 @@ static int vsc8514_config_pre_init(struct phy_device *phydev) reg &= ~SMI_BROADCAST_WR_EN; phy_base_write(phydev, MSCC_PHY_EXT_CNTL_STATUS, reg); - return 0; -} - -static u32 vsc85xx_csr_ctrl_phy_read(struct phy_device *phydev, - u32 target, u32 reg) -{ - unsigned long deadline; - u32 val, val_l, val_h; - - phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_CSR_CNTL); - - /* CSR registers are grouped under different Target IDs. - * 6-bit Target_ID is split between MSCC_EXT_PAGE_CSR_CNTL_20 and - * MSCC_EXT_PAGE_CSR_CNTL_19 registers. - * Target_ID[5:2] maps to bits[3:0] of MSCC_EXT_PAGE_CSR_CNTL_20 - * and Target_ID[1:0] maps to bits[13:12] of MSCC_EXT_PAGE_CSR_CNTL_19. - */ - - /* Setup the Target ID */ - phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_20, - MSCC_PHY_CSR_CNTL_20_TARGET(target >> 2)); - - /* Trigger CSR Action - Read into the CSR's */ - phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_19, - MSCC_PHY_CSR_CNTL_19_CMD | MSCC_PHY_CSR_CNTL_19_READ | - MSCC_PHY_CSR_CNTL_19_REG_ADDR(reg) | - MSCC_PHY_CSR_CNTL_19_TARGET(target & 0x3)); - - /* Wait for register access*/ - deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS); - do { - usleep_range(500, 1000); - val = phy_base_read(phydev, MSCC_EXT_PAGE_CSR_CNTL_19); - } while (time_before(jiffies, deadline) && - !(val & MSCC_PHY_CSR_CNTL_19_CMD)); - - if (!(val & MSCC_PHY_CSR_CNTL_19_CMD)) - return 0xffffffff; - - /* Read the Least Significant Word (LSW) (17) */ - val_l = phy_base_read(phydev, MSCC_EXT_PAGE_CSR_CNTL_17); - - /* Read the Most Significant Word (MSW) (18) */ - val_h = phy_base_read(phydev, MSCC_EXT_PAGE_CSR_CNTL_18); - - phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, - MSCC_PHY_PAGE_STANDARD); - - return (val_h << 16) | val_l; -} - -static int vsc85xx_csr_ctrl_phy_write(struct phy_device *phydev, - u32 target, u32 reg, u32 val) -{ - unsigned long deadline; - - phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_CSR_CNTL); - - /* CSR registers are grouped under different Target IDs. - * 6-bit Target_ID is split between MSCC_EXT_PAGE_CSR_CNTL_20 and - * MSCC_EXT_PAGE_CSR_CNTL_19 registers. - * Target_ID[5:2] maps to bits[3:0] of MSCC_EXT_PAGE_CSR_CNTL_20 - * and Target_ID[1:0] maps to bits[13:12] of MSCC_EXT_PAGE_CSR_CNTL_19. + /* Add pre-patching commands to: + * 1. enable 8051 clock, operate 8051 clock at 125 MHz + * instead of HW default 62.5MHz + * 2. write patch vector 0, to skip IB cal polling executed + * as part of the 0x80E0 ROM command */ + vsc8584_micro_deassert_reset(phydev, false); - /* Setup the Target ID */ - phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_20, - MSCC_PHY_CSR_CNTL_20_TARGET(target >> 2)); - - /* Write the Least Significant Word (LSW) (17) */ - phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_17, (u16)val); - - /* Write the Most Significant Word (MSW) (18) */ - phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_18, (u16)(val >> 16)); - - /* Trigger CSR Action - Write into the CSR's */ - phy_base_write(phydev, MSCC_EXT_PAGE_CSR_CNTL_19, - MSCC_PHY_CSR_CNTL_19_CMD | - MSCC_PHY_CSR_CNTL_19_REG_ADDR(reg) | - MSCC_PHY_CSR_CNTL_19_TARGET(target & 0x3)); - - /* Wait for register access */ - deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS); - do { - usleep_range(500, 1000); - val = phy_base_read(phydev, MSCC_EXT_PAGE_CSR_CNTL_19); - } while (time_before(jiffies, deadline) && - !(val & MSCC_PHY_CSR_CNTL_19_CMD)); - - if (!(val & MSCC_PHY_CSR_CNTL_19_CMD)) - return -ETIMEDOUT; - + vsc8584_micro_assert_reset(phydev); phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, - MSCC_PHY_PAGE_STANDARD); - - return 0; -} - -static int __phy_write_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb, - u32 op) -{ - unsigned long deadline; - u32 val; - int ret; - - ret = vsc85xx_csr_ctrl_phy_write(phydev, PHY_MCB_TARGET, reg, - op | (1 << mcb)); + MSCC_PHY_PAGE_EXTENDED_GPIO); + /* ROM address to trap, for patch vector 0 */ + reg = MSCC_ROM_TRAP_SERDES_6G_CFG; + ret = phy_base_write(phydev, MSCC_TRAP_ROM_ADDR(1), reg); if (ret) - return -EINVAL; - - deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS); - do { - usleep_range(500, 1000); - val = vsc85xx_csr_ctrl_phy_read(phydev, PHY_MCB_TARGET, reg); - - if (val == 0xffffffff) - return -EIO; - - } while (time_before(jiffies, deadline) && (val & op)); - - if (val & op) - return -ETIMEDOUT; - - return 0; -} - -/* Trigger a read to the specified MCB */ -static int phy_update_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb) -{ - return __phy_write_mcb_s6g(phydev, reg, mcb, PHY_MCB_S6G_READ); -} + goto err; + /* RAM address to jump to, when patch vector 0 enabled */ + reg = MSCC_RAM_TRAP_SERDES_6G_CFG; + ret = phy_base_write(phydev, MSCC_PATCH_RAM_ADDR(1), reg); + if (ret) + goto err; + reg = phy_base_read(phydev, MSCC_INT_MEM_CNTL); + reg |= PATCH_VEC_ZERO_EN; /* bit 8, enable patch vector 0 */ + ret = phy_base_write(phydev, MSCC_INT_MEM_CNTL, reg); + if (ret) + goto err; -/* Trigger a write to the specified MCB */ -static int phy_commit_mcb_s6g(struct phy_device *phydev, u32 reg, u8 mcb) -{ - return __phy_write_mcb_s6g(phydev, reg, mcb, PHY_MCB_S6G_WRITE); + /* Enable 8051 clock, don't set patch present + * yet, disable PRAM clock override + */ + vsc8584_micro_deassert_reset(phydev, false); + return ret; + err: + /* restore 8051 and bail w error */ + vsc8584_micro_deassert_reset(phydev, false); + return ret; } static int vsc8514_config_init(struct phy_device *phydev) { struct vsc8531_private *vsc8531 = phydev->priv; - unsigned long deadline; int ret, i; - u16 val; - u32 reg; phydev->mdix_ctrl = ETH_TP_MDI_AUTO; @@ -1763,123 +1970,14 @@ static int vsc8514_config_init(struct phy_device *phydev) * do the correct init sequence for all PHYs that are package-critical * in this pre-init function. */ - if (phy_package_init_once(phydev)) - vsc8514_config_pre_init(phydev); - - ret = phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, - MSCC_PHY_PAGE_EXTENDED_GPIO); - if (ret) - goto err; - - val = phy_base_read(phydev, MSCC_PHY_MAC_CFG_FASTLINK); - - val &= ~MAC_CFG_MASK; - val |= MAC_CFG_QSGMII; - ret = phy_base_write(phydev, MSCC_PHY_MAC_CFG_FASTLINK, val); - if (ret) - goto err; - - ret = phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, - MSCC_PHY_PAGE_STANDARD); - if (ret) - goto err; - - ret = vsc8584_cmd(phydev, - PROC_CMD_MCB_ACCESS_MAC_CONF | - PROC_CMD_RST_CONF_PORT | - PROC_CMD_READ_MOD_WRITE_PORT | PROC_CMD_QSGMII_MAC); - if (ret) - goto err; - - /* 6g mcb */ - phy_update_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); - /* lcpll mcb */ - phy_update_mcb_s6g(phydev, PHY_S6G_LCPLL_CFG, 0); - /* pll5gcfg0 */ - ret = vsc85xx_csr_ctrl_phy_write(phydev, PHY_MCB_TARGET, - PHY_S6G_PLL5G_CFG0, 0x7036f145); - if (ret) - goto err; - - phy_commit_mcb_s6g(phydev, PHY_S6G_LCPLL_CFG, 0); - /* pllcfg */ - ret = vsc85xx_csr_ctrl_phy_write(phydev, PHY_MCB_TARGET, - PHY_S6G_PLL_CFG, - (3 << PHY_S6G_PLL_ENA_OFFS_POS) | - (120 << PHY_S6G_PLL_FSM_CTRL_DATA_POS) - | (0 << PHY_S6G_PLL_FSM_ENA_POS)); - if (ret) - goto err; - - /* commoncfg */ - ret = vsc85xx_csr_ctrl_phy_write(phydev, PHY_MCB_TARGET, - PHY_S6G_COMMON_CFG, - (0 << PHY_S6G_SYS_RST_POS) | - (0 << PHY_S6G_ENA_LANE_POS) | - (0 << PHY_S6G_ENA_LOOP_POS) | - (0 << PHY_S6G_QRATE_POS) | - (3 << PHY_S6G_IF_MODE_POS)); - if (ret) - goto err; - - /* misccfg */ - ret = vsc85xx_csr_ctrl_phy_write(phydev, PHY_MCB_TARGET, - PHY_S6G_MISC_CFG, 1); - if (ret) - goto err; - - /* gpcfg */ - ret = vsc85xx_csr_ctrl_phy_write(phydev, PHY_MCB_TARGET, - PHY_S6G_GPC_CFG, 768); - if (ret) - goto err; - - phy_commit_mcb_s6g(phydev, PHY_S6G_DFT_CFG2, 0); - - deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS); - do { - usleep_range(500, 1000); - phy_update_mcb_s6g(phydev, PHY_MCB_S6G_CFG, - 0); /* read 6G MCB into CSRs */ - reg = vsc85xx_csr_ctrl_phy_read(phydev, PHY_MCB_TARGET, - PHY_S6G_PLL_STATUS); - if (reg == 0xffffffff) { - phy_unlock_mdio_bus(phydev); - return -EIO; - } - - } while (time_before(jiffies, deadline) && (reg & BIT(12))); - - if (reg & BIT(12)) { - phy_unlock_mdio_bus(phydev); - return -ETIMEDOUT; - } - - /* misccfg */ - ret = vsc85xx_csr_ctrl_phy_write(phydev, PHY_MCB_TARGET, - PHY_S6G_MISC_CFG, 0); - if (ret) - goto err; - - phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); - - deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS); - do { - usleep_range(500, 1000); - phy_update_mcb_s6g(phydev, PHY_MCB_S6G_CFG, - 0); /* read 6G MCB into CSRs */ - reg = vsc85xx_csr_ctrl_phy_read(phydev, PHY_MCB_TARGET, - PHY_S6G_IB_STATUS0); - if (reg == 0xffffffff) { - phy_unlock_mdio_bus(phydev); - return -EIO; - } - - } while (time_before(jiffies, deadline) && !(reg & BIT(8))); - - if (!(reg & BIT(8))) { - phy_unlock_mdio_bus(phydev); - return -ETIMEDOUT; + if (phy_package_init_once(phydev)) { + ret = vsc8514_config_pre_init(phydev); + if (ret) + goto err; + ret = vsc8514_config_host_serdes(phydev); + if (ret) + goto err; + vsc85xx_coma_mode_release(phydev); } phy_unlock_mdio_bus(phydev); diff --git a/drivers/net/phy/mscc/mscc_serdes.c b/drivers/net/phy/mscc/mscc_serdes.c new file mode 100644 index 000000000000..b3e854f53d67 --- /dev/null +++ b/drivers/net/phy/mscc/mscc_serdes.c @@ -0,0 +1,650 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* + * Driver for Microsemi VSC85xx PHYs + * + * Author: Bjarni Jonasson + * License: Dual MIT/GPL + * Copyright (c) 2021 Microsemi Corporation + */ + +#include +#include "mscc_serdes.h" +#include "mscc.h" + +static int pll5g_detune(struct phy_device *phydev) +{ + u32 rd_dat; + int ret; + + rd_dat = vsc85xx_csr_read(phydev, MACRO_CTRL, PHY_S6G_PLL5G_CFG2); + rd_dat &= ~PHY_S6G_PLL5G_CFG2_GAIN_MASK; + rd_dat |= PHY_S6G_PLL5G_CFG2_ENA_GAIN; + ret = vsc85xx_csr_write(phydev, MACRO_CTRL, + PHY_S6G_PLL5G_CFG2, rd_dat); + if (ret) + dev_err(&phydev->mdio.dev, "%s: write error\n", __func__); + return ret; +} + +static int pll5g_tune(struct phy_device *phydev) +{ + u32 rd_dat; + int ret; + + rd_dat = vsc85xx_csr_read(phydev, MACRO_CTRL, PHY_S6G_PLL5G_CFG2); + rd_dat &= ~PHY_S6G_PLL5G_CFG2_ENA_GAIN; + ret = vsc85xx_csr_write(phydev, MACRO_CTRL, + PHY_S6G_PLL5G_CFG2, rd_dat); + if (ret) + dev_err(&phydev->mdio.dev, "%s: write error\n", __func__); + return ret; +} + +static int vsc85xx_sd6g_pll_cfg_wr(struct phy_device *phydev, + const u32 pll_ena_offs, + const u32 pll_fsm_ctrl_data, + const u32 pll_fsm_ena) +{ + int ret; + + ret = vsc85xx_csr_write(phydev, MACRO_CTRL, + PHY_S6G_PLL_CFG, + (pll_fsm_ena << PHY_S6G_PLL_ENA_OFFS_POS) | + (pll_fsm_ctrl_data << PHY_S6G_PLL_FSM_CTRL_DATA_POS) | + (pll_ena_offs << PHY_S6G_PLL_FSM_ENA_POS)); + if (ret) + dev_err(&phydev->mdio.dev, "%s: write error\n", __func__); + return ret; +} + +static int vsc85xx_sd6g_common_cfg_wr(struct phy_device *phydev, + const u32 sys_rst, + const u32 ena_lane, + const u32 ena_loop, + const u32 qrate, + const u32 if_mode, + const u32 pwd_tx) +{ + /* ena_loop = 8 for eloop */ + /* = 4 for floop */ + /* = 2 for iloop */ + /* = 1 for ploop */ + /* qrate = 1 for SGMII, 0 for QSGMII */ + /* if_mode = 1 for SGMII, 3 for QSGMII */ + + int ret; + + ret = vsc85xx_csr_write(phydev, MACRO_CTRL, + PHY_S6G_COMMON_CFG, + (sys_rst << PHY_S6G_SYS_RST_POS) | + (ena_lane << PHY_S6G_ENA_LANE_POS) | + (ena_loop << PHY_S6G_ENA_LOOP_POS) | + (qrate << PHY_S6G_QRATE_POS) | + (if_mode << PHY_S6G_IF_MODE_POS)); + if (ret) + dev_err(&phydev->mdio.dev, "%s: write error\n", __func__); + return ret; +} + +static int vsc85xx_sd6g_des_cfg_wr(struct phy_device *phydev, + const u32 des_phy_ctrl, + const u32 des_mbtr_ctrl, + const u32 des_bw_hyst, + const u32 des_bw_ana, + const u32 des_cpmd_sel) +{ + u32 reg_val; + int ret; + + /* configurable terms */ + reg_val = (des_phy_ctrl << PHY_S6G_DES_PHY_CTRL_POS) | + (des_mbtr_ctrl << PHY_S6G_DES_MBTR_CTRL_POS) | + (des_cpmd_sel << PHY_S6G_DES_CPMD_SEL_POS) | + (des_bw_hyst << PHY_S6G_DES_BW_HYST_POS) | + (des_bw_ana << PHY_S6G_DES_BW_ANA_POS); + ret = vsc85xx_csr_write(phydev, MACRO_CTRL, + PHY_S6G_DES_CFG, + reg_val); + if (ret) + dev_err(&phydev->mdio.dev, "%s: write error\n", __func__); + return ret; +} + +static int vsc85xx_sd6g_ib_cfg0_wr(struct phy_device *phydev, + const u32 ib_rtrm_adj, + const u32 ib_sig_det_clk_sel, + const u32 ib_reg_pat_sel_offset, + const u32 ib_cal_ena) +{ + u32 base_val; + u32 reg_val; + int ret; + + /* constant terms */ + base_val = 0x60a85837; + /* configurable terms */ + reg_val = base_val | (ib_rtrm_adj << 25) | + (ib_sig_det_clk_sel << 16) | + (ib_reg_pat_sel_offset << 8) | + (ib_cal_ena << 3); + ret = vsc85xx_csr_write(phydev, MACRO_CTRL, + PHY_S6G_IB_CFG0, + reg_val); + if (ret) + dev_err(&phydev->mdio.dev, "%s: write error\n", __func__); + return ret; +} + +static int vsc85xx_sd6g_ib_cfg1_wr(struct phy_device *phydev, + const u32 ib_tjtag, + const u32 ib_tsdet, + const u32 ib_scaly, + const u32 ib_frc_offset, + const u32 ib_filt_offset) +{ + u32 ib_filt_val; + u32 reg_val = 0; + int ret; + + /* constant terms */ + ib_filt_val = 0xe0; + /* configurable terms */ + reg_val = (ib_tjtag << 17) + (ib_tsdet << 12) + (ib_scaly << 8) + + ib_filt_val + (ib_filt_offset << 4) + (ib_frc_offset << 0); + ret = vsc85xx_csr_write(phydev, MACRO_CTRL, + PHY_S6G_IB_CFG1, + reg_val); + if (ret) + dev_err(&phydev->mdio.dev, "%s: write error\n", __func__); + return ret; +} + +static int vsc85xx_sd6g_ib_cfg2_wr(struct phy_device *phydev, + const u32 ib_tinfv, + const u32 ib_tcalv, + const u32 ib_ureg) +{ + u32 ib_cfg2_val; + u32 base_val; + int ret; + + /* constant terms */ + base_val = 0x0f878010; + /* configurable terms */ + ib_cfg2_val = base_val | ((ib_tinfv) << 28) | ((ib_tcalv) << 5) | + (ib_ureg << 0); + ret = vsc85xx_csr_write(phydev, MACRO_CTRL, + PHY_S6G_IB_CFG2, + ib_cfg2_val); + if (ret) + dev_err(&phydev->mdio.dev, "%s: write error\n", __func__); + return ret; +} + +static int vsc85xx_sd6g_ib_cfg3_wr(struct phy_device *phydev, + const u32 ib_ini_hp, + const u32 ib_ini_mid, + const u32 ib_ini_lp, + const u32 ib_ini_offset) +{ + u32 reg_val; + int ret; + + reg_val = (ib_ini_hp << 24) + (ib_ini_mid << 16) + + (ib_ini_lp << 8) + (ib_ini_offset << 0); + ret = vsc85xx_csr_write(phydev, MACRO_CTRL, + PHY_S6G_IB_CFG3, + reg_val); + if (ret) + dev_err(&phydev->mdio.dev, "%s: write error\n", __func__); + return ret; +} + +static int vsc85xx_sd6g_ib_cfg4_wr(struct phy_device *phydev, + const u32 ib_max_hp, + const u32 ib_max_mid, + const u32 ib_max_lp, + const u32 ib_max_offset) +{ + u32 reg_val; + int ret; + + reg_val = (ib_max_hp << 24) + (ib_max_mid << 16) + + (ib_max_lp << 8) + (ib_max_offset << 0); + ret = vsc85xx_csr_write(phydev, MACRO_CTRL, + PHY_S6G_IB_CFG4, + reg_val); + if (ret) + dev_err(&phydev->mdio.dev, "%s: write error\n", __func__); + return ret; +} + +static int vsc85xx_sd6g_misc_cfg_wr(struct phy_device *phydev, + const u32 lane_rst) +{ + int ret; + + ret = vsc85xx_csr_write(phydev, MACRO_CTRL, + PHY_S6G_MISC_CFG, + lane_rst); + if (ret) + dev_err(&phydev->mdio.dev, "%s: write error\n", __func__); + return ret; +} + +static int vsc85xx_sd6g_gp_cfg_wr(struct phy_device *phydev, const u32 gp_cfg_val) +{ + int ret; + + ret = vsc85xx_csr_write(phydev, MACRO_CTRL, + PHY_S6G_GP_CFG, + gp_cfg_val); + if (ret) + dev_err(&phydev->mdio.dev, "%s: write error\n", __func__); + return ret; +} + +static int vsc85xx_sd6g_dft_cfg2_wr(struct phy_device *phydev, + const u32 rx_ji_ampl, + const u32 rx_step_freq, + const u32 rx_ji_ena, + const u32 rx_waveform_sel, + const u32 rx_freqoff_dir, + const u32 rx_freqoff_ena) +{ + u32 reg_val; + int ret; + + /* configurable terms */ + reg_val = (rx_ji_ampl << 8) | (rx_step_freq << 4) | + (rx_ji_ena << 3) | (rx_waveform_sel << 2) | + (rx_freqoff_dir << 1) | rx_freqoff_ena; + ret = vsc85xx_csr_write(phydev, MACRO_CTRL, + PHY_S6G_IB_DFT_CFG2, + reg_val); + if (ret) + dev_err(&phydev->mdio.dev, "%s: write error\n", __func__); + return ret; +} + +static int vsc85xx_sd6g_dft_cfg0_wr(struct phy_device *phydev, + const u32 prbs_sel, + const u32 test_mode, + const u32 rx_dft_ena) +{ + u32 reg_val; + int ret; + + /* configurable terms */ + reg_val = (prbs_sel << 20) | (test_mode << 16) | (rx_dft_ena << 2); + ret = vsc85xx_csr_write(phydev, MACRO_CTRL, + PHY_S6G_DFT_CFG0, + reg_val); + if (ret) + dev_err(&phydev->mdio.dev, "%s: write error\n", __func__); + return ret; +} + +/* Access LCPLL Cfg_0 */ +static int vsc85xx_pll5g_cfg0_wr(struct phy_device *phydev, + const u32 selbgv820) +{ + u32 base_val; + u32 reg_val; + int ret; + + /* constant terms */ + base_val = 0x7036f145; + /* configurable terms */ + reg_val = base_val | (selbgv820 << 23); + ret = vsc85xx_csr_write(phydev, MACRO_CTRL, + PHY_S6G_PLL5G_CFG0, reg_val); + if (ret) + dev_err(&phydev->mdio.dev, "%s: write error\n", __func__); + return ret; +} + +int vsc85xx_sd6g_config_v2(struct phy_device *phydev) +{ + u32 ib_sig_det_clk_sel_cal = 0; + u32 ib_sig_det_clk_sel_mm = 7; + u32 pll_fsm_ctrl_data = 60; + unsigned long deadline; + u32 des_bw_ana_val = 3; + u32 ib_tsdet_cal = 16; + u32 ib_tsdet_mm = 5; + u32 ib_rtrm_adj; + u32 if_mode = 1; + u32 gp_iter = 5; + u32 val32 = 0; + u32 qrate = 1; + u32 iter; + int val = 0; + int ret; + + phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_STANDARD); + + /* Detune/Unlock LCPLL */ + ret = pll5g_detune(phydev); + if (ret) + return ret; + + /* 0. Reset RCPLL */ + ret = vsc85xx_sd6g_pll_cfg_wr(phydev, 3, pll_fsm_ctrl_data, 0); + if (ret) + return ret; + ret = vsc85xx_sd6g_common_cfg_wr(phydev, 0, 0, 0, qrate, if_mode, 0); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + ret = vsc85xx_sd6g_des_cfg_wr(phydev, 6, 2, 5, des_bw_ana_val, 0); + if (ret) + return ret; + + /* 1. Configure sd6g for SGMII prior to sd6g_IB_CAL */ + ib_rtrm_adj = 13; + ret = vsc85xx_sd6g_ib_cfg0_wr(phydev, ib_rtrm_adj, ib_sig_det_clk_sel_mm, 0, 0); + if (ret) + return ret; + ret = vsc85xx_sd6g_ib_cfg1_wr(phydev, 8, ib_tsdet_mm, 15, 0, 1); + if (ret) + return ret; + ret = vsc85xx_sd6g_ib_cfg2_wr(phydev, 3, 13, 5); + if (ret) + return ret; + ret = vsc85xx_sd6g_ib_cfg3_wr(phydev, 0, 31, 1, 31); + if (ret) + return ret; + ret = vsc85xx_sd6g_ib_cfg4_wr(phydev, 63, 63, 2, 63); + if (ret) + return ret; + ret = vsc85xx_sd6g_common_cfg_wr(phydev, 1, 1, 0, qrate, if_mode, 0); + if (ret) + return ret; + ret = vsc85xx_sd6g_misc_cfg_wr(phydev, 1); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + + /* 2. Start rcpll_fsm */ + ret = vsc85xx_sd6g_pll_cfg_wr(phydev, 3, pll_fsm_ctrl_data, 1); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + + deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS); + do { + usleep_range(500, 1000); + ret = phy_update_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + val32 = vsc85xx_csr_read(phydev, MACRO_CTRL, + PHY_S6G_PLL_STATUS); + /* wait for bit 12 to clear */ + } while (time_before(jiffies, deadline) && (val32 & BIT(12))); + + if (val32 & BIT(12)) + return -ETIMEDOUT; + + /* 4. Release digital reset and disable transmitter */ + ret = vsc85xx_sd6g_misc_cfg_wr(phydev, 0); + if (ret) + return ret; + ret = vsc85xx_sd6g_common_cfg_wr(phydev, 1, 1, 0, qrate, if_mode, 1); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + + /* 5. Apply a frequency offset on RX-side (using internal FoJi logic) */ + ret = vsc85xx_sd6g_gp_cfg_wr(phydev, 768); + if (ret) + return ret; + ret = vsc85xx_sd6g_dft_cfg2_wr(phydev, 0, 2, 0, 0, 0, 1); + if (ret) + return ret; + ret = vsc85xx_sd6g_dft_cfg0_wr(phydev, 0, 0, 1); + if (ret) + return ret; + ret = vsc85xx_sd6g_des_cfg_wr(phydev, 6, 2, 5, des_bw_ana_val, 2); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + + /* 6. Prepare required settings for IBCAL */ + ret = vsc85xx_sd6g_ib_cfg1_wr(phydev, 8, ib_tsdet_cal, 15, 1, 0); + if (ret) + return ret; + ret = vsc85xx_sd6g_ib_cfg0_wr(phydev, ib_rtrm_adj, ib_sig_det_clk_sel_cal, 0, 0); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + + /* 7. Start IB_CAL */ + ret = vsc85xx_sd6g_ib_cfg0_wr(phydev, ib_rtrm_adj, + ib_sig_det_clk_sel_cal, 0, 1); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + /* 11 cycles (for ViperA) or 5 cycles (for ViperB & Elise) w/ SW clock */ + for (iter = 0; iter < gp_iter; iter++) { + /* set gp(0) */ + ret = vsc85xx_sd6g_gp_cfg_wr(phydev, 769); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + /* clear gp(0) */ + ret = vsc85xx_sd6g_gp_cfg_wr(phydev, 768); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + } + + ret = vsc85xx_sd6g_ib_cfg1_wr(phydev, 8, ib_tsdet_cal, 15, 1, 1); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + ret = vsc85xx_sd6g_ib_cfg1_wr(phydev, 8, ib_tsdet_cal, 15, 0, 1); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + + /* 8. Wait for IB cal to complete */ + deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS); + do { + usleep_range(500, 1000); + ret = phy_update_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + val32 = vsc85xx_csr_read(phydev, MACRO_CTRL, + PHY_S6G_IB_STATUS0); + /* wait for bit 8 to set */ + } while (time_before(jiffies, deadline) && (~val32 & BIT(8))); + + if (~val32 & BIT(8)) + return -ETIMEDOUT; + + /* 9. Restore cfg values for mission mode */ + ret = vsc85xx_sd6g_ib_cfg0_wr(phydev, ib_rtrm_adj, ib_sig_det_clk_sel_mm, 0, 1); + if (ret) + return ret; + ret = vsc85xx_sd6g_ib_cfg1_wr(phydev, 8, ib_tsdet_mm, 15, 0, 1); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + + /* 10. Re-enable transmitter */ + ret = vsc85xx_sd6g_common_cfg_wr(phydev, 1, 1, 0, qrate, if_mode, 0); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + + /* 11. Disable frequency offset generation (using internal FoJi logic) */ + ret = vsc85xx_sd6g_dft_cfg2_wr(phydev, 0, 0, 0, 0, 0, 0); + if (ret) + return ret; + ret = vsc85xx_sd6g_dft_cfg0_wr(phydev, 0, 0, 0); + if (ret) + return ret; + ret = vsc85xx_sd6g_des_cfg_wr(phydev, 6, 2, 5, des_bw_ana_val, 0); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + + /* Tune/Re-lock LCPLL */ + ret = pll5g_tune(phydev); + if (ret) + return ret; + + /* 12. Configure for Final Configuration and Settings */ + /* a. Reset RCPLL */ + ret = vsc85xx_sd6g_pll_cfg_wr(phydev, 3, pll_fsm_ctrl_data, 0); + if (ret) + return ret; + ret = vsc85xx_sd6g_common_cfg_wr(phydev, 0, 1, 0, qrate, if_mode, 0); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + + /* b. Configure sd6g for desired operating mode */ + phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_EXTENDED_GPIO); + ret = phy_base_read(phydev, MSCC_PHY_MAC_CFG_FASTLINK); + if ((ret & MAC_CFG_MASK) == MAC_CFG_QSGMII) { + /* QSGMII */ + pll_fsm_ctrl_data = 120; + qrate = 0; + if_mode = 3; + des_bw_ana_val = 5; + val = PROC_CMD_MCB_ACCESS_MAC_CONF | PROC_CMD_RST_CONF_PORT | + PROC_CMD_READ_MOD_WRITE_PORT | PROC_CMD_QSGMII_MAC; + + ret = vsc8584_cmd(phydev, val); + if (ret) { + dev_err(&phydev->mdio.dev, "%s: QSGMII error: %d\n", + __func__, ret); + return ret; + } + + phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_STANDARD); + } else if ((ret & MAC_CFG_MASK) == MAC_CFG_SGMII) { + /* SGMII */ + pll_fsm_ctrl_data = 60; + qrate = 1; + if_mode = 1; + des_bw_ana_val = 3; + + val = PROC_CMD_MCB_ACCESS_MAC_CONF | PROC_CMD_RST_CONF_PORT | + PROC_CMD_READ_MOD_WRITE_PORT | PROC_CMD_SGMII_MAC; + + ret = vsc8584_cmd(phydev, val); + if (ret) { + dev_err(&phydev->mdio.dev, "%s: SGMII error: %d\n", + __func__, ret); + return ret; + } + + phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_STANDARD); + } else { + dev_err(&phydev->mdio.dev, "%s: invalid mac_if: %x\n", + __func__, ret); + } + + ret = phy_update_mcb_s6g(phydev, PHY_S6G_LCPLL_CFG, 0); + if (ret) + return ret; + ret = phy_update_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + ret = vsc85xx_pll5g_cfg0_wr(phydev, 4); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_S6G_LCPLL_CFG, 0); + if (ret) + return ret; + ret = vsc85xx_sd6g_des_cfg_wr(phydev, 6, 2, 5, des_bw_ana_val, 0); + if (ret) + return ret; + ret = vsc85xx_sd6g_ib_cfg0_wr(phydev, ib_rtrm_adj, ib_sig_det_clk_sel_mm, 0, 1); + if (ret) + return ret; + ret = vsc85xx_sd6g_ib_cfg1_wr(phydev, 8, ib_tsdet_mm, 15, 0, 1); + if (ret) + return ret; + ret = vsc85xx_sd6g_common_cfg_wr(phydev, 1, 1, 0, qrate, if_mode, 0); + if (ret) + return ret; + ret = vsc85xx_sd6g_ib_cfg2_wr(phydev, 3, 13, 5); + if (ret) + return ret; + ret = vsc85xx_sd6g_ib_cfg3_wr(phydev, 0, 31, 1, 31); + if (ret) + return ret; + ret = vsc85xx_sd6g_ib_cfg4_wr(phydev, 63, 63, 2, 63); + if (ret) + return ret; + ret = vsc85xx_sd6g_misc_cfg_wr(phydev, 1); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + + /* 13. Start rcpll_fsm */ + ret = vsc85xx_sd6g_pll_cfg_wr(phydev, 3, pll_fsm_ctrl_data, 1); + if (ret) + return ret; + ret = phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + + /* 14. Wait for PLL cal to complete */ + deadline = jiffies + msecs_to_jiffies(PROC_CMD_NCOMPLETED_TIMEOUT_MS); + do { + usleep_range(500, 1000); + ret = phy_update_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); + if (ret) + return ret; + val32 = vsc85xx_csr_read(phydev, MACRO_CTRL, + PHY_S6G_PLL_STATUS); + /* wait for bit 12 to clear */ + } while (time_before(jiffies, deadline) && (val32 & BIT(12))); + + if (val32 & BIT(12)) + return -ETIMEDOUT; + + /* release lane reset */ + ret = vsc85xx_sd6g_misc_cfg_wr(phydev, 0); + if (ret) + return ret; + + return phy_commit_mcb_s6g(phydev, PHY_MCB_S6G_CFG, 0); +} diff --git a/drivers/net/phy/mscc/mscc_serdes.h b/drivers/net/phy/mscc/mscc_serdes.h new file mode 100644 index 000000000000..2a6371322af9 --- /dev/null +++ b/drivers/net/phy/mscc/mscc_serdes.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */ +/* + * Driver for Microsemi VSC85xx PHYs + * + * Copyright (c) 2021 Microsemi Corporation + */ + +#ifndef _MSCC_SERDES_PHY_H_ +#define _MSCC_SERDES_PHY_H_ + +#define PHY_S6G_PLL5G_CFG2_GAIN_MASK GENMASK(9, 5) +#define PHY_S6G_PLL5G_CFG2_ENA_GAIN 1 + +#define PHY_S6G_DES_PHY_CTRL_POS 13 +#define PHY_S6G_DES_MBTR_CTRL_POS 10 +#define PHY_S6G_DES_CPMD_SEL_POS 8 +#define PHY_S6G_DES_BW_HYST_POS 5 +#define PHY_S6G_DES_BW_ANA_POS 1 +#define PHY_S6G_DES_CFG 0x21 +#define PHY_S6G_IB_CFG0 0x22 +#define PHY_S6G_IB_CFG1 0x23 +#define PHY_S6G_IB_CFG2 0x24 +#define PHY_S6G_IB_CFG3 0x25 +#define PHY_S6G_IB_CFG4 0x26 +#define PHY_S6G_GP_CFG 0x2E +#define PHY_S6G_DFT_CFG0 0x35 +#define PHY_S6G_IB_DFT_CFG2 0x37 + +int vsc85xx_sd6g_config_v2(struct phy_device *phydev); + +#endif /* _MSCC_PHY_SERDES_H_ */ diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index 45f75533c47c..c93c295db3dc 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -276,14 +276,16 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, phydev->autoneg = autoneg; - phydev->speed = speed; + if (autoneg == AUTONEG_DISABLE) { + phydev->speed = speed; + phydev->duplex = duplex; + } linkmode_copy(phydev->advertising, advertising); linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT, phydev->advertising, autoneg == AUTONEG_ENABLE); - phydev->duplex = duplex; phydev->master_slave_set = cmd->base.master_slave_cfg; phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl; @@ -308,7 +310,7 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev, if (phydev->interface == PHY_INTERFACE_MODE_MOCA) cmd->base.port = PORT_BNC; else - cmd->base.port = PORT_MII; + cmd->base.port = phydev->port; cmd->base.transceiver = phy_is_internal(phydev) ? XCVR_INTERNAL : XCVR_EXTERNAL; cmd->base.phy_address = phydev->mdio.addr; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 80c2e646c093..d2fd54e4c612 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -230,7 +230,6 @@ static struct phy_driver genphy_driver; static LIST_HEAD(phy_fixup_list); static DEFINE_MUTEX(phy_fixup_lock); -#ifdef CONFIG_PM static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) { struct device_driver *drv = phydev->mdio.dev.driver; @@ -270,7 +269,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) return !phydev->suspended; } -static int mdio_bus_phy_suspend(struct device *dev) +static __maybe_unused int mdio_bus_phy_suspend(struct device *dev) { struct phy_device *phydev = to_phy_device(dev); @@ -290,7 +289,7 @@ static int mdio_bus_phy_suspend(struct device *dev) return phy_suspend(phydev); } -static int mdio_bus_phy_resume(struct device *dev) +static __maybe_unused int mdio_bus_phy_resume(struct device *dev) { struct phy_device *phydev = to_phy_device(dev); int ret; @@ -300,51 +299,22 @@ static int mdio_bus_phy_resume(struct device *dev) phydev->suspended_by_mdio_bus = 0; - ret = phy_resume(phydev); + ret = phy_init_hw(phydev); if (ret < 0) return ret; -no_resume: - if (phydev->attached_dev && phydev->adjust_link) - phy_start_machine(phydev); - - return 0; -} - -static int mdio_bus_phy_restore(struct device *dev) -{ - struct phy_device *phydev = to_phy_device(dev); - struct net_device *netdev = phydev->attached_dev; - int ret; - - if (!netdev) - return 0; - - ret = phy_init_hw(phydev); + ret = phy_resume(phydev); if (ret < 0) return ret; - +no_resume: if (phydev->attached_dev && phydev->adjust_link) phy_start_machine(phydev); return 0; } -static const struct dev_pm_ops mdio_bus_phy_pm_ops = { - .suspend = mdio_bus_phy_suspend, - .resume = mdio_bus_phy_resume, - .freeze = mdio_bus_phy_suspend, - .thaw = mdio_bus_phy_resume, - .restore = mdio_bus_phy_restore, -}; - -#define MDIO_BUS_PHY_PM_OPS (&mdio_bus_phy_pm_ops) - -#else - -#define MDIO_BUS_PHY_PM_OPS NULL - -#endif /* CONFIG_PM */ +static SIMPLE_DEV_PM_OPS(mdio_bus_phy_pm_ops, mdio_bus_phy_suspend, + mdio_bus_phy_resume); /** * phy_register_fixup - creates a new phy_fixup and adds it to the list @@ -554,7 +524,7 @@ static const struct device_type mdio_bus_phy_type = { .name = "PHY", .groups = phy_dev_groups, .release = phy_device_release, - .pm = MDIO_BUS_PHY_PM_OPS, + .pm = pm_ptr(&mdio_bus_phy_pm_ops), }; static int phy_request_driver_module(struct phy_device *dev, u32 phy_id) @@ -606,6 +576,7 @@ struct phy_device *phy_device_create(struct mii_bus *bus, int addr, u32 phy_id, dev->pause = 0; dev->asym_pause = 0; dev->link = 0; + dev->port = PORT_TP; dev->interface = PHY_INTERFACE_MODE_GMII; dev->autoneg = AUTONEG_ENABLE; @@ -1143,10 +1114,19 @@ int phy_init_hw(struct phy_device *phydev) if (ret < 0) return ret; - if (phydev->drv->config_init) + if (phydev->drv->config_init) { ret = phydev->drv->config_init(phydev); + if (ret < 0) + return ret; + } - return ret; + if (phydev->drv->config_intr) { + ret = phydev->drv->config_intr(phydev); + if (ret < 0) + return ret; + } + + return 0; } EXPORT_SYMBOL(phy_init_hw); @@ -1403,6 +1383,14 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, phydev->state = PHY_READY; + /* Port is set to PORT_TP by default and the actual PHY driver will set + * it to different value depending on the PHY configuration. If we have + * the generic PHY driver we can't figure it out, thus set the old + * legacy PORT_MII value. + */ + if (using_genphy) + phydev->port = PORT_MII; + /* Initial carrier state is off as the phy is about to be * (re)initialized. */ diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 84f6e197f965..add9156601af 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -472,7 +472,7 @@ static void phylink_major_config(struct phylink *pl, bool restart, err = pl->mac_ops->mac_finish(pl->config, pl->cur_link_an_mode, state->interface); if (err < 0) - phylink_err(pl, "mac_prepare failed: %pe\n", + phylink_err(pl, "mac_finish failed: %pe\n", ERR_PTR(err)); } } diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 20b91f5dfc6e..4cf874fb5c5b 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -44,6 +44,17 @@ static void sfp_quirk_2500basex(const struct sfp_eeprom_id *id, phylink_set(modes, 2500baseX_Full); } +static void sfp_quirk_ubnt_uf_instant(const struct sfp_eeprom_id *id, + unsigned long *modes) +{ + /* Ubiquiti U-Fiber Instant module claims that support all transceiver + * types including 10G Ethernet which is not truth. So clear all claimed + * modes and set only one mode which module supports: 1000baseX_Full. + */ + phylink_zero(modes); + phylink_set(modes, 1000baseX_Full); +} + static const struct sfp_quirk sfp_quirks[] = { { // Alcatel Lucent G-010S-P can operate at 2500base-X, but @@ -63,6 +74,10 @@ static const struct sfp_quirk sfp_quirks[] = { .vendor = "HUAWEI", .part = "MA5671A", .modes = sfp_quirk_2500basex, + }, { + .vendor = "UBNT", + .part = "UF-INSTANT", + .modes = sfp_quirk_ubnt_uf_instant, }, }; diff --git a/drivers/net/phy/sfp.c b/drivers/net/phy/sfp.c index 91d74c1a920a..7a680b5177f5 100644 --- a/drivers/net/phy/sfp.c +++ b/drivers/net/phy/sfp.c @@ -273,8 +273,21 @@ static const struct sff_data sff_data = { static bool sfp_module_supported(const struct sfp_eeprom_id *id) { - return id->base.phys_id == SFF8024_ID_SFP && - id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP; + if (id->base.phys_id == SFF8024_ID_SFP && + id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP) + return true; + + /* SFP GPON module Ubiquiti U-Fiber Instant has in its EEPROM stored + * phys id SFF instead of SFP. Therefore mark this module explicitly + * as supported based on vendor name and pn match. + */ + if (id->base.phys_id == SFF8024_ID_SFF_8472 && + id->base.phys_ext_id == SFP_PHYS_EXT_ID_SFP && + !memcmp(id->base.vendor_name, "UBNT ", 16) && + !memcmp(id->base.vendor_pn, "UF-INSTANT ", 16)) + return true; + + return false; } static const struct sff_data sfp_data = { @@ -336,19 +349,11 @@ static int sfp_i2c_read(struct sfp *sfp, bool a2, u8 dev_addr, void *buf, size_t len) { struct i2c_msg msgs[2]; - size_t block_size; + u8 bus_addr = a2 ? 0x51 : 0x50; + size_t block_size = sfp->i2c_block_size; size_t this_len; - u8 bus_addr; int ret; - if (a2) { - block_size = 16; - bus_addr = 0x51; - } else { - block_size = sfp->i2c_block_size; - bus_addr = 0x50; - } - msgs[0].addr = bus_addr; msgs[0].flags = 0; msgs[0].len = 1; @@ -1282,6 +1287,20 @@ static void sfp_hwmon_probe(struct work_struct *work) struct sfp *sfp = container_of(work, struct sfp, hwmon_probe.work); int err, i; + /* hwmon interface needs to access 16bit registers in atomic way to + * guarantee coherency of the diagnostic monitoring data. If it is not + * possible to guarantee coherency because EEPROM is broken in such way + * that does not support atomic 16bit read operation then we have to + * skip registration of hwmon device. + */ + if (sfp->i2c_block_size < 2) { + dev_info(sfp->dev, + "skipping hwmon device registration due to broken EEPROM\n"); + dev_info(sfp->dev, + "diagnostic EEPROM area cannot be read atomically to guarantee data coherency\n"); + return; + } + err = sfp_read(sfp, true, 0, &sfp->diag, sizeof(sfp->diag)); if (err < 0) { if (sfp->hwmon_tries--) { @@ -1642,26 +1661,30 @@ static int sfp_sm_mod_hpower(struct sfp *sfp, bool enable) return 0; } -/* Some modules (Nokia 3FE46541AA) lock up if byte 0x51 is read as a - * single read. Switch back to reading 16 byte blocks unless we have - * a CarlitoxxPro module (rebranded VSOL V2801F). Even more annoyingly, - * some VSOL V2801F have the vendor name changed to OEM. +/* GPON modules based on Realtek RTL8672 and RTL9601C chips (e.g. V-SOL + * V2801F, CarlitoxxPro CPGOS03-0490, Ubiquiti U-Fiber Instant, ...) do + * not support multibyte reads from the EEPROM. Each multi-byte read + * operation returns just one byte of EEPROM followed by zeros. There is + * no way to identify which modules are using Realtek RTL8672 and RTL9601C + * chips. Moreover every OEM of V-SOL V2801F module puts its own vendor + * name and vendor id into EEPROM, so there is even no way to detect if + * module is V-SOL V2801F. Therefore check for those zeros in the read + * data and then based on check switch to reading EEPROM to one byte + * at a time. */ -static int sfp_quirk_i2c_block_size(const struct sfp_eeprom_base *base) +static bool sfp_id_needs_byte_io(struct sfp *sfp, void *buf, size_t len) { - if (!memcmp(base->vendor_name, "VSOL ", 16)) - return 1; - if (!memcmp(base->vendor_name, "OEM ", 16) && - !memcmp(base->vendor_pn, "V2801F ", 16)) - return 1; + size_t i, block_size = sfp->i2c_block_size; - /* Some modules can't cope with long reads */ - return 16; -} + /* Already using byte IO */ + if (block_size == 1) + return false; -static void sfp_quirks_base(struct sfp *sfp, const struct sfp_eeprom_base *base) -{ - sfp->i2c_block_size = sfp_quirk_i2c_block_size(base); + for (i = 1; i < len; i += block_size) { + if (memchr_inv(buf + i, '\0', min(block_size - 1, len - i))) + return false; + } + return true; } static int sfp_cotsworks_fixup_check(struct sfp *sfp, struct sfp_eeprom_id *id) @@ -1705,11 +1728,11 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) u8 check; int ret; - /* Some modules (CarlitoxxPro CPGOS03-0490) do not support multibyte - * reads from the EEPROM, so start by reading the base identifying - * information one byte at a time. + /* Some SFP modules and also some Linux I2C drivers do not like reads + * longer than 16 bytes, so read the EEPROM in chunks of 16 bytes at + * a time. */ - sfp->i2c_block_size = 1; + sfp->i2c_block_size = 16; ret = sfp_read(sfp, false, 0, &id.base, sizeof(id.base)); if (ret < 0) { @@ -1723,6 +1746,33 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) return -EAGAIN; } + /* Some SFP modules (e.g. Nokia 3FE46541AA) lock up if read from + * address 0x51 is just one byte at a time. Also SFF-8472 requires + * that EEPROM supports atomic 16bit read operation for diagnostic + * fields, so do not switch to one byte reading at a time unless it + * is really required and we have no other option. + */ + if (sfp_id_needs_byte_io(sfp, &id.base, sizeof(id.base))) { + dev_info(sfp->dev, + "Detected broken RTL8672/RTL9601C emulated EEPROM\n"); + dev_info(sfp->dev, + "Switching to reading EEPROM to one byte at a time\n"); + sfp->i2c_block_size = 1; + + ret = sfp_read(sfp, false, 0, &id.base, sizeof(id.base)); + if (ret < 0) { + if (report) + dev_err(sfp->dev, "failed to read EEPROM: %d\n", + ret); + return -EAGAIN; + } + + if (ret != sizeof(id.base)) { + dev_err(sfp->dev, "EEPROM short read: %d\n", ret); + return -EAGAIN; + } + } + /* Cotsworks do not seem to update the checksums when they * do the final programming with the final module part number, * serial number and date code. @@ -1757,9 +1807,6 @@ static int sfp_sm_mod_probe(struct sfp *sfp, bool report) } } - /* Apply any early module-specific quirks */ - sfp_quirks_base(sfp, &id.base); - ret = sfp_read(sfp, false, SFP_CC_BASE + 1, &id.ext, sizeof(id.ext)); if (ret < 0) { if (report) diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c index 29a0917a81e6..f14a9d190de9 100644 --- a/drivers/net/ppp/ppp_async.c +++ b/drivers/net/ppp/ppp_async.c @@ -259,7 +259,8 @@ static int ppp_asynctty_hangup(struct tty_struct *tty) */ static ssize_t ppp_asynctty_read(struct tty_struct *tty, struct file *file, - unsigned char __user *buf, size_t count) + unsigned char *buf, size_t count, + void **cookie, unsigned long offset) { return -EAGAIN; } diff --git a/drivers/net/ppp/ppp_synctty.c b/drivers/net/ppp/ppp_synctty.c index 0f338752c38b..f774b7e52da4 100644 --- a/drivers/net/ppp/ppp_synctty.c +++ b/drivers/net/ppp/ppp_synctty.c @@ -257,7 +257,8 @@ static int ppp_sync_hangup(struct tty_struct *tty) */ static ssize_t ppp_sync_read(struct tty_struct *tty, struct file *file, - unsigned char __user *buf, size_t count) + unsigned char *buf, size_t count, + void **cookie, unsigned long offset) { return -EAGAIN; } diff --git a/drivers/net/tap.c b/drivers/net/tap.c index 1f4bdd94407a..f549d3a8e59c 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -1093,10 +1093,9 @@ static long tap_ioctl(struct file *file, unsigned int cmd, return -ENOLINK; } ret = 0; - u = tap->dev->type; + dev_get_mac_address(&sa, dev_net(tap->dev), tap->dev->name); if (copy_to_user(&ifr->ifr_name, tap->dev->name, IFNAMSIZ) || - copy_to_user(&ifr->ifr_hwaddr.sa_data, tap->dev->dev_addr, ETH_ALEN) || - put_user(u, &ifr->ifr_hwaddr.sa_family)) + copy_to_user(&ifr->ifr_hwaddr, &sa, sizeof(sa))) ret = -EFAULT; tap_put_tap_dev(tap); rtnl_unlock(); @@ -1111,7 +1110,7 @@ static long tap_ioctl(struct file *file, unsigned int cmd, rtnl_unlock(); return -ENOLINK; } - ret = dev_set_mac_address(tap->dev, &sa, NULL); + ret = dev_set_mac_address_user(tap->dev, &sa, NULL); tap_put_tap_dev(tap); rtnl_unlock(); return ret; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 978ac0981d16..5512418b7be0 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3113,15 +3113,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, case SIOCGIFHWADDR: /* Get hw address */ - memcpy(ifr.ifr_hwaddr.sa_data, tun->dev->dev_addr, ETH_ALEN); - ifr.ifr_hwaddr.sa_family = tun->dev->type; + dev_get_mac_address(&ifr.ifr_hwaddr, net, tun->dev->name); if (copy_to_user(argp, &ifr, ifreq_len)) ret = -EFAULT; break; case SIOCSIFHWADDR: /* Set hw address */ - ret = dev_set_mac_address(tun->dev, &ifr.ifr_hwaddr, NULL); + ret = dev_set_mac_address_user(tun->dev, &ifr.ifr_hwaddr, NULL); break; case TUNGETSNDBUF: diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c index 02e6bbb17b15..8d1f69dad603 100644 --- a/drivers/net/usb/cdc-phonet.c +++ b/drivers/net/usb/cdc-phonet.c @@ -387,6 +387,8 @@ static int usbpn_probe(struct usb_interface *intf, const struct usb_device_id *i err = register_netdev(dev); if (err) { + /* Set disconnected flag so that disconnect() returns early. */ + pnd->disconnected = 1; usb_driver_release_interface(&usbpn_driver, data_intf); goto out; } diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 5a05add9b4e6..e18ded349d84 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -396,13 +396,6 @@ static ssize_t add_mux_store(struct device *d, struct device_attribute *attr, c goto err; } - /* we don't want to modify a running netdev */ - if (netif_running(dev->net)) { - netdev_err(dev->net, "Cannot change a running device\n"); - ret = -EBUSY; - goto err; - } - ret = qmimux_register_device(dev->net, mux_id); if (!ret) { info->flags |= QMI_WWAN_FLAG_MUX; @@ -432,13 +425,6 @@ static ssize_t del_mux_store(struct device *d, struct device_attribute *attr, c if (!rtnl_trylock()) return restart_syscall(); - /* we don't want to modify a running netdev */ - if (netif_running(dev->net)) { - netdev_err(dev->net, "Cannot change a running device\n"); - ret = -EBUSY; - goto err; - } - del_dev = qmimux_find_dev(dev, mux_id); if (!del_dev) { netdev_err(dev->net, "mux_id not present\n"); @@ -1235,6 +1221,7 @@ static const struct usb_device_id products[] = { {QMI_FIXED_INTF(0x19d2, 0x1255, 4)}, {QMI_FIXED_INTF(0x19d2, 0x1256, 4)}, {QMI_FIXED_INTF(0x19d2, 0x1270, 5)}, /* ZTE MF667 */ + {QMI_FIXED_INTF(0x19d2, 0x1275, 3)}, /* ZTE P685M */ {QMI_FIXED_INTF(0x19d2, 0x1401, 2)}, {QMI_FIXED_INTF(0x19d2, 0x1402, 2)}, /* ZTE MF60 */ {QMI_FIXED_INTF(0x19d2, 0x1424, 2)}, diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 67cd6986634f..390d9e1fa7fe 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3016,29 +3016,6 @@ static void __rtl_set_wol(struct r8152 *tp, u32 wolopts) device_set_wakeup_enable(&tp->udev->dev, false); } -static void r8153_mac_clk_spd(struct r8152 *tp, bool enable) -{ - /* MAC clock speed down */ - if (enable) { - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, - ALDPS_SPDWN_RATIO); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, - EEE_SPDWN_RATIO); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, - PKT_AVAIL_SPDWN_EN | SUSPEND_SPDWN_EN | - U1U2_SPDWN_EN | L1_SPDWN_EN); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, - PWRSAVE_SPDWN_EN | RXDV_SPDWN_EN | TX10MIDLE_EN | - TP100_SPDWN_EN | TP500_SPDWN_EN | EEE_SPDWN_EN | - TP1000_SPDWN_EN); - } else { - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, 0); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, 0); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0); - } -} - static void r8153_u1u2en(struct r8152 *tp, bool enable) { u8 u1u2[8]; @@ -3338,11 +3315,9 @@ static void rtl8153_runtime_enable(struct r8152 *tp, bool enable) if (enable) { r8153_u1u2en(tp, false); r8153_u2p3en(tp, false); - r8153_mac_clk_spd(tp, true); rtl_runtime_suspend_enable(tp, true); } else { rtl_runtime_suspend_enable(tp, false); - r8153_mac_clk_spd(tp, false); switch (tp->version) { case RTL_VER_03: @@ -4678,7 +4653,6 @@ static void r8153_first_init(struct r8152 *tp) { u32 ocp_data; - r8153_mac_clk_spd(tp, false); rxdy_gated_en(tp, true); r8153_teredo_off(tp); @@ -4729,8 +4703,6 @@ static void r8153_enter_oob(struct r8152 *tp) { u32 ocp_data; - r8153_mac_clk_spd(tp, true); - ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); ocp_data &= ~NOW_IS_OOB; ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); @@ -5456,10 +5428,15 @@ static void r8153_init(struct r8152 *tp) ocp_write_word(tp, MCU_TYPE_USB, USB_CONNECT_TIMER, 0x0001); + /* MAC clock speed down */ + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, 0); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, 0); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0); + r8153_power_cut_en(tp, false); rtl_runtime_suspend_enable(tp, false); r8153_u1u2en(tp, true); - r8153_mac_clk_spd(tp, false); usb_enable_lpm(tp->udev); ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6); @@ -6525,7 +6502,10 @@ static int rtl_ops_init(struct r8152 *tp) ops->in_nway = rtl8153_in_nway; ops->hw_phy_cfg = r8153_hw_phy_cfg; ops->autosuspend_en = rtl8153_runtime_enable; - tp->rx_buf_sz = 32 * 1024; + if (tp->udev->speed < USB_SPEED_SUPER) + tp->rx_buf_sz = 16 * 1024; + else + tp->rx_buf_sz = 32 * 1024; tp->eee_en = true; tp->eee_adv = MDIO_EEE_1000T | MDIO_EEE_100TX; break; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 02bfcdf50a7a..36abe756282e 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -301,8 +301,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) if (rxq < rcv->real_num_rx_queues) { rq = &rcv_priv->rq[rxq]; rcv_xdp = rcu_access_pointer(rq->xdp_prog); - if (rcv_xdp) - skb_record_rx_queue(skb, rxq); + skb_record_rx_queue(skb, rxq); } skb_tx_timestamp(skb); diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index a8ad710629e6..0842371eca3d 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -4725,7 +4725,6 @@ static void vxlan_destroy_tunnels(struct net *net, struct list_head *head) struct vxlan_net *vn = net_generic(net, vxlan_net_id); struct vxlan_dev *vxlan, *next; struct net_device *dev, *aux; - unsigned int h; for_each_netdev_safe(net, dev, aux) if (dev->rtnl_link_ops == &vxlan_link_ops) @@ -4739,14 +4738,13 @@ static void vxlan_destroy_tunnels(struct net *net, struct list_head *head) unregister_netdevice_queue(vxlan->dev, head); } - for (h = 0; h < PORT_HASH_SIZE; ++h) - WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h])); } static void __net_exit vxlan_exit_batch_net(struct list_head *net_list) { struct net *net; LIST_HEAD(list); + unsigned int h; rtnl_lock(); list_for_each_entry(net, net_list, exit_list) { @@ -4759,6 +4757,13 @@ static void __net_exit vxlan_exit_batch_net(struct list_head *net_list) unregister_netdevice_many(&list); rtnl_unlock(); + + list_for_each_entry(net, net_list, exit_list) { + struct vxlan_net *vn = net_generic(net, vxlan_net_id); + + for (h = 0; h < PORT_HASH_SIZE; ++h) + WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h])); + } } static struct pernet_operations vxlan_net_ops = { diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index dca97cd7c4e7..7eac6a3e1cde 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -204,14 +204,18 @@ static int uhdlc_init(struct ucc_hdlc_private *priv) priv->rx_skbuff = kcalloc(priv->rx_ring_size, sizeof(*priv->rx_skbuff), GFP_KERNEL); - if (!priv->rx_skbuff) + if (!priv->rx_skbuff) { + ret = -ENOMEM; goto free_ucc_pram; + } priv->tx_skbuff = kcalloc(priv->tx_ring_size, sizeof(*priv->tx_skbuff), GFP_KERNEL); - if (!priv->tx_skbuff) + if (!priv->tx_skbuff) { + ret = -ENOMEM; goto free_rx_skbuff; + } priv->skb_curtx = 0; priv->skb_dirtytx = 0; diff --git a/drivers/net/wan/hdlc_x25.c b/drivers/net/wan/hdlc_x25.c index 4aaa6388b9ee..5a6a945f6c81 100644 --- a/drivers/net/wan/hdlc_x25.c +++ b/drivers/net/wan/hdlc_x25.c @@ -23,6 +23,8 @@ struct x25_state { x25_hdlc_proto settings; + bool up; + spinlock_t up_lock; /* Protects "up" */ }; static int x25_ioctl(struct net_device *dev, struct ifreq *ifr); @@ -104,6 +106,8 @@ static void x25_data_transmit(struct net_device *dev, struct sk_buff *skb) static netdev_tx_t x25_xmit(struct sk_buff *skb, struct net_device *dev) { + hdlc_device *hdlc = dev_to_hdlc(dev); + struct x25_state *x25st = state(hdlc); int result; /* There should be a pseudo header of 1 byte added by upper layers. @@ -114,11 +118,19 @@ static netdev_tx_t x25_xmit(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } + spin_lock_bh(&x25st->up_lock); + if (!x25st->up) { + spin_unlock_bh(&x25st->up_lock); + kfree_skb(skb); + return NETDEV_TX_OK; + } + switch (skb->data[0]) { case X25_IFACE_DATA: /* Data to be transmitted */ skb_pull(skb, 1); if ((result = lapb_data_request(dev, skb)) != LAPB_OK) dev_kfree_skb(skb); + spin_unlock_bh(&x25st->up_lock); return NETDEV_TX_OK; case X25_IFACE_CONNECT: @@ -147,6 +159,7 @@ static netdev_tx_t x25_xmit(struct sk_buff *skb, struct net_device *dev) break; } + spin_unlock_bh(&x25st->up_lock); dev_kfree_skb(skb); return NETDEV_TX_OK; } @@ -164,6 +177,7 @@ static int x25_open(struct net_device *dev) .data_transmit = x25_data_transmit, }; hdlc_device *hdlc = dev_to_hdlc(dev); + struct x25_state *x25st = state(hdlc); struct lapb_parms_struct params; int result; @@ -190,6 +204,10 @@ static int x25_open(struct net_device *dev) if (result != LAPB_OK) return -EINVAL; + spin_lock_bh(&x25st->up_lock); + x25st->up = true; + spin_unlock_bh(&x25st->up_lock); + return 0; } @@ -197,6 +215,13 @@ static int x25_open(struct net_device *dev) static void x25_close(struct net_device *dev) { + hdlc_device *hdlc = dev_to_hdlc(dev); + struct x25_state *x25st = state(hdlc); + + spin_lock_bh(&x25st->up_lock); + x25st->up = false; + spin_unlock_bh(&x25st->up_lock); + lapb_unregister(dev); } @@ -205,15 +230,28 @@ static void x25_close(struct net_device *dev) static int x25_rx(struct sk_buff *skb) { struct net_device *dev = skb->dev; + hdlc_device *hdlc = dev_to_hdlc(dev); + struct x25_state *x25st = state(hdlc); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { dev->stats.rx_dropped++; return NET_RX_DROP; } - if (lapb_data_received(dev, skb) == LAPB_OK) + spin_lock_bh(&x25st->up_lock); + if (!x25st->up) { + spin_unlock_bh(&x25st->up_lock); + kfree_skb(skb); + dev->stats.rx_dropped++; + return NET_RX_DROP; + } + + if (lapb_data_received(dev, skb) == LAPB_OK) { + spin_unlock_bh(&x25st->up_lock); return NET_RX_SUCCESS; + } + spin_unlock_bh(&x25st->up_lock); dev->stats.rx_errors++; dev_kfree_skb_any(skb); return NET_RX_DROP; @@ -298,6 +336,8 @@ static int x25_ioctl(struct net_device *dev, struct ifreq *ifr) return result; memcpy(&state(hdlc)->settings, &new_settings, size); + state(hdlc)->up = false; + spin_lock_init(&state(hdlc)->up_lock); /* There's no header_ops so hard_header_len should be 0. */ dev->hard_header_len = 0; diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c index 605fe555e157..c3372498f4f1 100644 --- a/drivers/net/wan/lapbether.c +++ b/drivers/net/wan/lapbether.c @@ -292,7 +292,6 @@ static int lapbeth_open(struct net_device *dev) return -ENODEV; } - netif_start_queue(dev); return 0; } @@ -300,8 +299,6 @@ static int lapbeth_close(struct net_device *dev) { int err; - netif_stop_queue(dev); - if ((err = lapb_unregister(dev)) != LAPB_OK) pr_err("lapb_unregister error: %d\n", err); diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c index a3ed49cd95c3..b4d84c881c7d 100644 --- a/drivers/net/wireguard/device.c +++ b/drivers/net/wireguard/device.c @@ -138,7 +138,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) else if (skb->protocol == htons(ETH_P_IPV6)) net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n", dev->name, &ipv6_hdr(skb)->daddr); - goto err; + goto err_icmp; } family = READ_ONCE(peer->endpoint.addr.sa_family); @@ -201,12 +201,13 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev) err_peer: wg_peer_put(peer); -err: - ++dev->stats.tx_errors; +err_icmp: if (skb->protocol == htons(ETH_P_IP)) icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0); else if (skb->protocol == htons(ETH_P_IPV6)) icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); +err: + ++dev->stats.tx_errors; kfree_skb(skb); return ret; } @@ -234,8 +235,8 @@ static void wg_destruct(struct net_device *dev) destroy_workqueue(wg->handshake_receive_wq); destroy_workqueue(wg->handshake_send_wq); destroy_workqueue(wg->packet_crypt_wq); - wg_packet_queue_free(&wg->decrypt_queue, true); - wg_packet_queue_free(&wg->encrypt_queue, true); + wg_packet_queue_free(&wg->decrypt_queue); + wg_packet_queue_free(&wg->encrypt_queue); rcu_barrier(); /* Wait for all the peers to be actually freed. */ wg_ratelimiter_uninit(); memzero_explicit(&wg->static_identity, sizeof(wg->static_identity)); @@ -337,12 +338,12 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, goto err_destroy_handshake_send; ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker, - true, MAX_QUEUED_PACKETS); + MAX_QUEUED_PACKETS); if (ret < 0) goto err_destroy_packet_crypt; ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker, - true, MAX_QUEUED_PACKETS); + MAX_QUEUED_PACKETS); if (ret < 0) goto err_free_encrypt_queue; @@ -367,9 +368,9 @@ static int wg_newlink(struct net *src_net, struct net_device *dev, err_uninit_ratelimiter: wg_ratelimiter_uninit(); err_free_decrypt_queue: - wg_packet_queue_free(&wg->decrypt_queue, true); + wg_packet_queue_free(&wg->decrypt_queue); err_free_encrypt_queue: - wg_packet_queue_free(&wg->encrypt_queue, true); + wg_packet_queue_free(&wg->encrypt_queue); err_destroy_packet_crypt: destroy_workqueue(wg->packet_crypt_wq); err_destroy_handshake_send: diff --git a/drivers/net/wireguard/device.h b/drivers/net/wireguard/device.h index 4d0144e16947..854bc3d97150 100644 --- a/drivers/net/wireguard/device.h +++ b/drivers/net/wireguard/device.h @@ -27,13 +27,14 @@ struct multicore_worker { struct crypt_queue { struct ptr_ring ring; - union { - struct { - struct multicore_worker __percpu *worker; - int last_cpu; - }; - struct work_struct work; - }; + struct multicore_worker __percpu *worker; + int last_cpu; +}; + +struct prev_queue { + struct sk_buff *head, *tail, *peeked; + struct { struct sk_buff *next, *prev; } empty; // Match first 2 members of struct sk_buff. + atomic_t count; }; struct wg_device { diff --git a/drivers/net/wireguard/peer.c b/drivers/net/wireguard/peer.c index b3b6370e6b95..cd5cb0292cb6 100644 --- a/drivers/net/wireguard/peer.c +++ b/drivers/net/wireguard/peer.c @@ -32,27 +32,22 @@ struct wg_peer *wg_peer_create(struct wg_device *wg, peer = kzalloc(sizeof(*peer), GFP_KERNEL); if (unlikely(!peer)) return ERR_PTR(ret); - peer->device = wg; + if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) + goto err; + peer->device = wg; wg_noise_handshake_init(&peer->handshake, &wg->static_identity, public_key, preshared_key, peer); - if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL)) - goto err_1; - if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false, - MAX_QUEUED_PACKETS)) - goto err_2; - if (wg_packet_queue_init(&peer->rx_queue, NULL, false, - MAX_QUEUED_PACKETS)) - goto err_3; - peer->internal_id = atomic64_inc_return(&peer_counter); peer->serial_work_cpu = nr_cpumask_bits; wg_cookie_init(&peer->latest_cookie); wg_timers_init(peer); wg_cookie_checker_precompute_peer_keys(peer); spin_lock_init(&peer->keypairs.keypair_update_lock); - INIT_WORK(&peer->transmit_handshake_work, - wg_packet_handshake_send_worker); + INIT_WORK(&peer->transmit_handshake_work, wg_packet_handshake_send_worker); + INIT_WORK(&peer->transmit_packet_work, wg_packet_tx_worker); + wg_prev_queue_init(&peer->tx_queue); + wg_prev_queue_init(&peer->rx_queue); rwlock_init(&peer->endpoint_lock); kref_init(&peer->refcount); skb_queue_head_init(&peer->staged_packet_queue); @@ -68,11 +63,7 @@ struct wg_peer *wg_peer_create(struct wg_device *wg, pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id); return peer; -err_3: - wg_packet_queue_free(&peer->tx_queue, false); -err_2: - dst_cache_destroy(&peer->endpoint_cache); -err_1: +err: kfree(peer); return ERR_PTR(ret); } @@ -197,8 +188,7 @@ static void rcu_release(struct rcu_head *rcu) struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu); dst_cache_destroy(&peer->endpoint_cache); - wg_packet_queue_free(&peer->rx_queue, false); - wg_packet_queue_free(&peer->tx_queue, false); + WARN_ON(wg_prev_queue_peek(&peer->tx_queue) || wg_prev_queue_peek(&peer->rx_queue)); /* The final zeroing takes care of clearing any remaining handshake key * material and other potentially sensitive information. diff --git a/drivers/net/wireguard/peer.h b/drivers/net/wireguard/peer.h index 23af40922997..0809cda08bfa 100644 --- a/drivers/net/wireguard/peer.h +++ b/drivers/net/wireguard/peer.h @@ -36,7 +36,7 @@ struct endpoint { struct wg_peer { struct wg_device *device; - struct crypt_queue tx_queue, rx_queue; + struct prev_queue tx_queue, rx_queue; struct sk_buff_head staged_packet_queue; int serial_work_cpu; struct noise_keypairs keypairs; @@ -45,7 +45,7 @@ struct wg_peer { rwlock_t endpoint_lock; struct noise_handshake handshake; atomic64_t last_sent_handshake; - struct work_struct transmit_handshake_work, clear_peer_work; + struct work_struct transmit_handshake_work, clear_peer_work, transmit_packet_work; struct cookie latest_cookie; struct hlist_node pubkey_hash; u64 rx_bytes, tx_bytes; diff --git a/drivers/net/wireguard/queueing.c b/drivers/net/wireguard/queueing.c index 71b8e80b58e1..48e7b982a307 100644 --- a/drivers/net/wireguard/queueing.c +++ b/drivers/net/wireguard/queueing.c @@ -9,8 +9,7 @@ struct multicore_worker __percpu * wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr) { int cpu; - struct multicore_worker __percpu *worker = - alloc_percpu(struct multicore_worker); + struct multicore_worker __percpu *worker = alloc_percpu(struct multicore_worker); if (!worker) return NULL; @@ -23,7 +22,7 @@ wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr) } int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, - bool multicore, unsigned int len) + unsigned int len) { int ret; @@ -31,25 +30,78 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL); if (ret) return ret; - if (function) { - if (multicore) { - queue->worker = wg_packet_percpu_multicore_worker_alloc( - function, queue); - if (!queue->worker) { - ptr_ring_cleanup(&queue->ring, NULL); - return -ENOMEM; - } - } else { - INIT_WORK(&queue->work, function); - } + queue->worker = wg_packet_percpu_multicore_worker_alloc(function, queue); + if (!queue->worker) { + ptr_ring_cleanup(&queue->ring, NULL); + return -ENOMEM; } return 0; } -void wg_packet_queue_free(struct crypt_queue *queue, bool multicore) +void wg_packet_queue_free(struct crypt_queue *queue) { - if (multicore) - free_percpu(queue->worker); + free_percpu(queue->worker); WARN_ON(!__ptr_ring_empty(&queue->ring)); ptr_ring_cleanup(&queue->ring, NULL); } + +#define NEXT(skb) ((skb)->prev) +#define STUB(queue) ((struct sk_buff *)&queue->empty) + +void wg_prev_queue_init(struct prev_queue *queue) +{ + NEXT(STUB(queue)) = NULL; + queue->head = queue->tail = STUB(queue); + queue->peeked = NULL; + atomic_set(&queue->count, 0); + BUILD_BUG_ON( + offsetof(struct sk_buff, next) != offsetof(struct prev_queue, empty.next) - + offsetof(struct prev_queue, empty) || + offsetof(struct sk_buff, prev) != offsetof(struct prev_queue, empty.prev) - + offsetof(struct prev_queue, empty)); +} + +static void __wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb) +{ + WRITE_ONCE(NEXT(skb), NULL); + WRITE_ONCE(NEXT(xchg_release(&queue->head, skb)), skb); +} + +bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb) +{ + if (!atomic_add_unless(&queue->count, 1, MAX_QUEUED_PACKETS)) + return false; + __wg_prev_queue_enqueue(queue, skb); + return true; +} + +struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue) +{ + struct sk_buff *tail = queue->tail, *next = smp_load_acquire(&NEXT(tail)); + + if (tail == STUB(queue)) { + if (!next) + return NULL; + queue->tail = next; + tail = next; + next = smp_load_acquire(&NEXT(next)); + } + if (next) { + queue->tail = next; + atomic_dec(&queue->count); + return tail; + } + if (tail != READ_ONCE(queue->head)) + return NULL; + __wg_prev_queue_enqueue(queue, STUB(queue)); + next = smp_load_acquire(&NEXT(tail)); + if (next) { + queue->tail = next; + atomic_dec(&queue->count); + return tail; + } + return NULL; +} + +#undef NEXT +#undef STUB diff --git a/drivers/net/wireguard/queueing.h b/drivers/net/wireguard/queueing.h index dfb674e03076..4ef2944a68bc 100644 --- a/drivers/net/wireguard/queueing.h +++ b/drivers/net/wireguard/queueing.h @@ -17,12 +17,13 @@ struct wg_device; struct wg_peer; struct multicore_worker; struct crypt_queue; +struct prev_queue; struct sk_buff; /* queueing.c APIs: */ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function, - bool multicore, unsigned int len); -void wg_packet_queue_free(struct crypt_queue *queue, bool multicore); + unsigned int len); +void wg_packet_queue_free(struct crypt_queue *queue); struct multicore_worker __percpu * wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr); @@ -135,8 +136,31 @@ static inline int wg_cpumask_next_online(int *next) return cpu; } +void wg_prev_queue_init(struct prev_queue *queue); + +/* Multi producer */ +bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb); + +/* Single consumer */ +struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue); + +/* Single consumer */ +static inline struct sk_buff *wg_prev_queue_peek(struct prev_queue *queue) +{ + if (queue->peeked) + return queue->peeked; + queue->peeked = wg_prev_queue_dequeue(queue); + return queue->peeked; +} + +/* Single consumer */ +static inline void wg_prev_queue_drop_peeked(struct prev_queue *queue) +{ + queue->peeked = NULL; +} + static inline int wg_queue_enqueue_per_device_and_peer( - struct crypt_queue *device_queue, struct crypt_queue *peer_queue, + struct crypt_queue *device_queue, struct prev_queue *peer_queue, struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu) { int cpu; @@ -145,8 +169,9 @@ static inline int wg_queue_enqueue_per_device_and_peer( /* We first queue this up for the peer ingestion, but the consumer * will wait for the state to change to CRYPTED or DEAD before. */ - if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb))) + if (unlikely(!wg_prev_queue_enqueue(peer_queue, skb))) return -ENOSPC; + /* Then we queue it up in the device queue, which consumes the * packet as soon as it can. */ @@ -157,9 +182,7 @@ static inline int wg_queue_enqueue_per_device_and_peer( return 0; } -static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue, - struct sk_buff *skb, - enum packet_state state) +static inline void wg_queue_enqueue_per_peer_tx(struct sk_buff *skb, enum packet_state state) { /* We take a reference, because as soon as we call atomic_set, the * peer can be freed from below us. @@ -167,14 +190,12 @@ static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue, struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb)); atomic_set_release(&PACKET_CB(skb)->state, state); - queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, - peer->internal_id), - peer->device->packet_crypt_wq, &queue->work); + queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, peer->internal_id), + peer->device->packet_crypt_wq, &peer->transmit_packet_work); wg_peer_put(peer); } -static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb, - enum packet_state state) +static inline void wg_queue_enqueue_per_peer_rx(struct sk_buff *skb, enum packet_state state) { /* We take a reference, because as soon as we call atomic_set, the * peer can be freed from below us. diff --git a/drivers/net/wireguard/receive.c b/drivers/net/wireguard/receive.c index 2c9551ea6dc7..7dc84bcca261 100644 --- a/drivers/net/wireguard/receive.c +++ b/drivers/net/wireguard/receive.c @@ -444,7 +444,6 @@ static void wg_packet_consume_data_done(struct wg_peer *peer, int wg_packet_rx_poll(struct napi_struct *napi, int budget) { struct wg_peer *peer = container_of(napi, struct wg_peer, napi); - struct crypt_queue *queue = &peer->rx_queue; struct noise_keypair *keypair; struct endpoint endpoint; enum packet_state state; @@ -455,11 +454,10 @@ int wg_packet_rx_poll(struct napi_struct *napi, int budget) if (unlikely(budget <= 0)) return 0; - while ((skb = __ptr_ring_peek(&queue->ring)) != NULL && + while ((skb = wg_prev_queue_peek(&peer->rx_queue)) != NULL && (state = atomic_read_acquire(&PACKET_CB(skb)->state)) != PACKET_STATE_UNCRYPTED) { - __ptr_ring_discard_one(&queue->ring); - peer = PACKET_PEER(skb); + wg_prev_queue_drop_peeked(&peer->rx_queue); keypair = PACKET_CB(skb)->keypair; free = true; @@ -508,7 +506,7 @@ void wg_packet_decrypt_worker(struct work_struct *work) enum packet_state state = likely(decrypt_packet(skb, PACKET_CB(skb)->keypair)) ? PACKET_STATE_CRYPTED : PACKET_STATE_DEAD; - wg_queue_enqueue_per_peer_napi(skb, state); + wg_queue_enqueue_per_peer_rx(skb, state); if (need_resched()) cond_resched(); } @@ -531,12 +529,10 @@ static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb) if (unlikely(READ_ONCE(peer->is_dead))) goto err; - ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, - &peer->rx_queue, skb, - wg->packet_crypt_wq, - &wg->decrypt_queue.last_cpu); + ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, &peer->rx_queue, skb, + wg->packet_crypt_wq, &wg->decrypt_queue.last_cpu); if (unlikely(ret == -EPIPE)) - wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD); + wg_queue_enqueue_per_peer_rx(skb, PACKET_STATE_DEAD); if (likely(!ret || ret == -EPIPE)) { rcu_read_unlock_bh(); return; diff --git a/drivers/net/wireguard/send.c b/drivers/net/wireguard/send.c index f74b9341ab0f..5368f7c35b4b 100644 --- a/drivers/net/wireguard/send.c +++ b/drivers/net/wireguard/send.c @@ -239,8 +239,7 @@ void wg_packet_send_keepalive(struct wg_peer *peer) wg_packet_send_staged_packets(peer); } -static void wg_packet_create_data_done(struct sk_buff *first, - struct wg_peer *peer) +static void wg_packet_create_data_done(struct wg_peer *peer, struct sk_buff *first) { struct sk_buff *skb, *next; bool is_keepalive, data_sent = false; @@ -262,22 +261,19 @@ static void wg_packet_create_data_done(struct sk_buff *first, void wg_packet_tx_worker(struct work_struct *work) { - struct crypt_queue *queue = container_of(work, struct crypt_queue, - work); + struct wg_peer *peer = container_of(work, struct wg_peer, transmit_packet_work); struct noise_keypair *keypair; enum packet_state state; struct sk_buff *first; - struct wg_peer *peer; - while ((first = __ptr_ring_peek(&queue->ring)) != NULL && + while ((first = wg_prev_queue_peek(&peer->tx_queue)) != NULL && (state = atomic_read_acquire(&PACKET_CB(first)->state)) != PACKET_STATE_UNCRYPTED) { - __ptr_ring_discard_one(&queue->ring); - peer = PACKET_PEER(first); + wg_prev_queue_drop_peeked(&peer->tx_queue); keypair = PACKET_CB(first)->keypair; if (likely(state == PACKET_STATE_CRYPTED)) - wg_packet_create_data_done(first, peer); + wg_packet_create_data_done(peer, first); else kfree_skb_list(first); @@ -306,16 +302,14 @@ void wg_packet_encrypt_worker(struct work_struct *work) break; } } - wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first, - state); + wg_queue_enqueue_per_peer_tx(first, state); if (need_resched()) cond_resched(); } } -static void wg_packet_create_data(struct sk_buff *first) +static void wg_packet_create_data(struct wg_peer *peer, struct sk_buff *first) { - struct wg_peer *peer = PACKET_PEER(first); struct wg_device *wg = peer->device; int ret = -EINVAL; @@ -323,13 +317,10 @@ static void wg_packet_create_data(struct sk_buff *first) if (unlikely(READ_ONCE(peer->is_dead))) goto err; - ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, - &peer->tx_queue, first, - wg->packet_crypt_wq, - &wg->encrypt_queue.last_cpu); + ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, &peer->tx_queue, first, + wg->packet_crypt_wq, &wg->encrypt_queue.last_cpu); if (unlikely(ret == -EPIPE)) - wg_queue_enqueue_per_peer(&peer->tx_queue, first, - PACKET_STATE_DEAD); + wg_queue_enqueue_per_peer_tx(first, PACKET_STATE_DEAD); err: rcu_read_unlock_bh(); if (likely(!ret || ret == -EPIPE)) @@ -393,7 +384,7 @@ void wg_packet_send_staged_packets(struct wg_peer *peer) packets.prev->next = NULL; wg_peer_get(keypair->entry.peer); PACKET_CB(packets.next)->keypair = keypair; - wg_packet_create_data(packets.next); + wg_packet_create_data(peer, packets.next); return; out_invalid: diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index 05a61975c83f..869524852fba 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -626,7 +626,7 @@ static int ath10k_ahb_hif_start(struct ath10k *ar) { ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot ahb hif start\n"); - napi_enable(&ar->napi); + ath10k_core_napi_enable(ar); ath10k_ce_enable_interrupts(ar); ath10k_pci_enable_legacy_irq(ar); @@ -644,8 +644,7 @@ static void ath10k_ahb_hif_stop(struct ath10k *ar) ath10k_ahb_irq_disable(ar); synchronize_irq(ar_ahb->irq); - napi_synchronize(&ar->napi); - napi_disable(&ar->napi); + ath10k_core_napi_sync_disable(ar); ath10k_pci_flush(ar); } diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index eeb6ff6aa2e1..a419ec7130f9 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -2305,6 +2305,31 @@ void ath10k_core_start_recovery(struct ath10k *ar) } EXPORT_SYMBOL(ath10k_core_start_recovery); +void ath10k_core_napi_enable(struct ath10k *ar) +{ + lockdep_assert_held(&ar->conf_mutex); + + if (test_bit(ATH10K_FLAG_NAPI_ENABLED, &ar->dev_flags)) + return; + + napi_enable(&ar->napi); + set_bit(ATH10K_FLAG_NAPI_ENABLED, &ar->dev_flags); +} +EXPORT_SYMBOL(ath10k_core_napi_enable); + +void ath10k_core_napi_sync_disable(struct ath10k *ar) +{ + lockdep_assert_held(&ar->conf_mutex); + + if (!test_bit(ATH10K_FLAG_NAPI_ENABLED, &ar->dev_flags)) + return; + + napi_synchronize(&ar->napi); + napi_disable(&ar->napi); + clear_bit(ATH10K_FLAG_NAPI_ENABLED, &ar->dev_flags); +} +EXPORT_SYMBOL(ath10k_core_napi_sync_disable); + static void ath10k_core_restart(struct work_struct *work) { struct ath10k *ar = container_of(work, struct ath10k, restart_work); diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 51f7e960e297..f4be6bfb2539 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -868,6 +868,9 @@ enum ath10k_dev_flags { /* Indicates that ath10k device is during recovery process and not complete */ ATH10K_FLAG_RESTARTING, + + /* protected by conf_mutex */ + ATH10K_FLAG_NAPI_ENABLED, }; enum ath10k_cal_mode { @@ -1308,6 +1311,8 @@ static inline bool ath10k_peer_stats_enabled(struct ath10k *ar) extern unsigned long ath10k_coredump_mask; +void ath10k_core_napi_sync_disable(struct ath10k *ar); +void ath10k_core_napi_enable(struct ath10k *ar); struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev, enum ath10k_bus bus, enum ath10k_hw_rev hw_rev, diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 7d98250380ec..9a56a0a5f85d 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -3763,23 +3763,16 @@ bool ath10k_mac_tx_frm_has_freq(struct ath10k *ar) static int ath10k_mac_tx_wmi_mgmt(struct ath10k *ar, struct sk_buff *skb) { struct sk_buff_head *q = &ar->wmi_mgmt_tx_queue; - int ret = 0; - - spin_lock_bh(&ar->data_lock); - if (skb_queue_len(q) == ATH10K_MAX_NUM_MGMT_PENDING) { + if (skb_queue_len_lockless(q) >= ATH10K_MAX_NUM_MGMT_PENDING) { ath10k_warn(ar, "wmi mgmt tx queue is full\n"); - ret = -ENOSPC; - goto unlock; + return -ENOSPC; } - __skb_queue_tail(q, skb); + skb_queue_tail(q, skb); ieee80211_queue_work(ar->hw, &ar->wmi_mgmt_tx_work); -unlock: - spin_unlock_bh(&ar->data_lock); - - return ret; + return 0; } static enum ath10k_mac_tx_path @@ -9117,7 +9110,9 @@ static void ath10k_sta_statistics(struct ieee80211_hw *hw, if (!ath10k_peer_stats_enabled(ar)) return; + mutex_lock(&ar->conf_mutex); ath10k_debug_fw_stats_request(ar); + mutex_unlock(&ar->conf_mutex); sinfo->rx_duration = arsta->rx_duration; sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_DURATION); diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 2328df09875c..e7fde635e0ee 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1958,7 +1958,7 @@ static int ath10k_pci_hif_start(struct ath10k *ar) ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot hif start\n"); - napi_enable(&ar->napi); + ath10k_core_napi_enable(ar); ath10k_pci_irq_enable(ar); ath10k_pci_rx_post(ar); @@ -2075,8 +2075,9 @@ static void ath10k_pci_hif_stop(struct ath10k *ar) ath10k_pci_irq_disable(ar); ath10k_pci_irq_sync(ar); - napi_synchronize(&ar->napi); - napi_disable(&ar->napi); + + ath10k_core_napi_sync_disable(ar); + cancel_work_sync(&ar_pci->dump_work); /* Most likely the device has HTT Rx ring configured. The only way to diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c index c415090d1f37..b746052737e0 100644 --- a/drivers/net/wireless/ath/ath10k/sdio.c +++ b/drivers/net/wireless/ath/ath10k/sdio.c @@ -1859,7 +1859,7 @@ static int ath10k_sdio_hif_start(struct ath10k *ar) struct ath10k_sdio *ar_sdio = ath10k_sdio_priv(ar); int ret; - napi_enable(&ar->napi); + ath10k_core_napi_enable(ar); /* Sleep 20 ms before HIF interrupts are disabled. * This will give target plenty of time to process the BMI done @@ -1992,8 +1992,7 @@ static void ath10k_sdio_hif_stop(struct ath10k *ar) spin_unlock_bh(&ar_sdio->wr_async_lock); - napi_synchronize(&ar->napi); - napi_disable(&ar->napi); + ath10k_core_napi_sync_disable(ar); } #ifdef CONFIG_PM diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c index bf9a8cb713dc..af7ecef6bcde 100644 --- a/drivers/net/wireless/ath/ath10k/snoc.c +++ b/drivers/net/wireless/ath/ath10k/snoc.c @@ -915,8 +915,7 @@ static void ath10k_snoc_hif_stop(struct ath10k *ar) if (!test_bit(ATH10K_FLAG_CRASH_FLUSH, &ar->dev_flags)) ath10k_snoc_irq_disable(ar); - napi_synchronize(&ar->napi); - napi_disable(&ar->napi); + ath10k_core_napi_sync_disable(ar); ath10k_snoc_buffer_cleanup(ar); ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot hif stop\n"); } @@ -926,7 +925,8 @@ static int ath10k_snoc_hif_start(struct ath10k *ar) struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar); bitmap_clear(ar_snoc->pending_ce_irqs, 0, CE_COUNT_MAX); - napi_enable(&ar->napi); + + ath10k_core_napi_enable(ar); ath10k_snoc_irq_enable(ar); ath10k_snoc_rx_post(ar); @@ -1045,12 +1045,13 @@ static int ath10k_snoc_hif_power_up(struct ath10k *ar, ret = ath10k_snoc_init_pipes(ar); if (ret) { ath10k_err(ar, "failed to initialize CE: %d\n", ret); - goto err_wlan_enable; + goto err_free_rri; } return 0; -err_wlan_enable: +err_free_rri: + ath10k_ce_free_rri(ar); ath10k_snoc_wlan_disable(ar); return ret; diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index 7b5834157fe5..e6135795719a 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -240,8 +240,10 @@ static int ath10k_wmi_tlv_parse_peer_stats_info(struct ath10k *ar, u16 tag, u16 __le32_to_cpu(stat->last_tx_rate_code), __le32_to_cpu(stat->last_tx_bitrate_kbps)); + rcu_read_lock(); sta = ieee80211_find_sta_by_ifaddr(ar->hw, stat->peer_macaddr.addr, NULL); if (!sta) { + rcu_read_unlock(); ath10k_warn(ar, "not found station for peer stats\n"); return -EINVAL; } @@ -251,6 +253,7 @@ static int ath10k_wmi_tlv_parse_peer_stats_info(struct ath10k *ar, u16 tag, u16 arsta->rx_bitrate_kbps = __le32_to_cpu(stat->last_rx_bitrate_kbps); arsta->tx_rate_code = __le32_to_cpu(stat->last_tx_rate_code); arsta->tx_bitrate_kbps = __le32_to_cpu(stat->last_tx_bitrate_kbps); + rcu_read_unlock(); return 0; } diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c index c1608f64ea95..54bdef33f3f8 100644 --- a/drivers/net/wireless/ath/ath11k/mac.c +++ b/drivers/net/wireless/ath/ath11k/mac.c @@ -4248,11 +4248,6 @@ static int ath11k_mac_op_start(struct ieee80211_hw *hw) /* Configure the hash seed for hash based reo dest ring selection */ ath11k_wmi_pdev_lro_cfg(ar, ar->pdev->pdev_id); - mutex_unlock(&ar->conf_mutex); - - rcu_assign_pointer(ab->pdevs_active[ar->pdev_idx], - &ab->pdevs[ar->pdev_idx]); - /* allow device to enter IMPS */ if (ab->hw_params.idle_ps) { ret = ath11k_wmi_pdev_set_param(ar, WMI_PDEV_PARAM_IDLE_PS_CONFIG, @@ -4262,6 +4257,12 @@ static int ath11k_mac_op_start(struct ieee80211_hw *hw) goto err; } } + + mutex_unlock(&ar->conf_mutex); + + rcu_assign_pointer(ab->pdevs_active[ar->pdev_idx], + &ab->pdevs[ar->pdev_idx]); + return 0; err: @@ -5298,8 +5299,8 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, } if (ab->hw_params.vdev_start_delay && - (arvif->vdev_type == WMI_VDEV_TYPE_AP || - arvif->vdev_type == WMI_VDEV_TYPE_MONITOR)) { + arvif->vdev_type != WMI_VDEV_TYPE_AP && + arvif->vdev_type != WMI_VDEV_TYPE_MONITOR) { param.vdev_id = arvif->vdev_id; param.peer_type = WMI_PEER_TYPE_DEFAULT; param.peer_addr = ar->mac_addr; diff --git a/drivers/net/wireless/ath/ath9k/ath9k.h b/drivers/net/wireless/ath/ath9k/ath9k.h index 13b4f5f50f8a..ef6f5ea06c1f 100644 --- a/drivers/net/wireless/ath/ath9k/ath9k.h +++ b/drivers/net/wireless/ath/ath9k/ath9k.h @@ -177,7 +177,8 @@ struct ath_frame_info { s8 txq; u8 keyix; u8 rtscts_rate; - u8 retries : 7; + u8 retries : 6; + u8 dyn_smps : 1; u8 baw_tracked : 1; u8 tx_power; enum ath9k_key_type keytype:2; diff --git a/drivers/net/wireless/ath/ath9k/debug.c b/drivers/net/wireless/ath/ath9k/debug.c index 017a43bc400c..4c81b1d7f417 100644 --- a/drivers/net/wireless/ath/ath9k/debug.c +++ b/drivers/net/wireless/ath/ath9k/debug.c @@ -1223,8 +1223,11 @@ static ssize_t write_file_nf_override(struct file *file, ah->nf_override = val; - if (ah->curchan) + if (ah->curchan) { + ath9k_ps_wakeup(sc); ath9k_hw_loadnf(ah, ah->curchan); + ath9k_ps_restore(sc); + } return count; } diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index e60d4737fc6e..5691bd6eb82c 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -1271,6 +1271,11 @@ static void ath_buf_set_rate(struct ath_softc *sc, struct ath_buf *bf, is_40, is_sgi, is_sp); if (rix < 8 && (tx_info->flags & IEEE80211_TX_CTL_STBC)) info->rates[i].RateFlags |= ATH9K_RATESERIES_STBC; + if (rix >= 8 && fi->dyn_smps) { + info->rates[i].RateFlags |= + ATH9K_RATESERIES_RTS_CTS; + info->flags |= ATH9K_TXDESC_CTSENA; + } info->txpower[i] = ath_get_rate_txpower(sc, bf, rix, is_40, false); @@ -2114,6 +2119,7 @@ static void setup_frame_info(struct ieee80211_hw *hw, fi->keyix = an->ps_key; else fi->keyix = ATH9K_TXKEYIX_INVALID; + fi->dyn_smps = sta && sta->smps_mode == IEEE80211_SMPS_DYNAMIC; fi->keytype = keytype; fi->framelen = framelen; fi->tx_power = txpower; diff --git a/drivers/net/wireless/broadcom/b43/phy_n.c b/drivers/net/wireless/broadcom/b43/phy_n.c index b669dff24b6e..665b737fbb0d 100644 --- a/drivers/net/wireless/broadcom/b43/phy_n.c +++ b/drivers/net/wireless/broadcom/b43/phy_n.c @@ -5311,7 +5311,7 @@ static void b43_nphy_restore_cal(struct b43_wldev *dev) for (i = 0; i < 4; i++) { if (dev->phy.rev >= 3) - table[i] = coef[i]; + coef[i] = table[i]; else coef[i] = 0; } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c index 4aa2561934d7..6d5188b78f2d 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/dmi.c @@ -40,6 +40,18 @@ static const struct brcmf_dmi_data pov_tab_p1006w_data = { BRCM_CC_43340_CHIP_ID, 2, "pov-tab-p1006w-data" }; +static const struct brcmf_dmi_data predia_basic_data = { + BRCM_CC_43341_CHIP_ID, 2, "predia-basic" +}; + +/* Note the Voyo winpad A15 tablet uses the same Ampak AP6330 module, with the + * exact same nvram file as the Prowise-PT301 tablet. Since the nvram for the + * Prowise-PT301 is already in linux-firmware we just point to that here. + */ +static const struct brcmf_dmi_data voyo_winpad_a15_data = { + BRCM_CC_4330_CHIP_ID, 4, "Prowise-PT301" +}; + static const struct dmi_system_id dmi_platform_data[] = { { /* ACEPC T8 Cherry Trail Z8350 mini PC */ @@ -111,6 +123,26 @@ static const struct dmi_system_id dmi_platform_data[] = { }, .driver_data = (void *)&pov_tab_p1006w_data, }, + { + /* Predia Basic tablet (+ with keyboard dock) */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Insyde"), + DMI_MATCH(DMI_PRODUCT_NAME, "CherryTrail"), + /* Mx.WT107.KUBNGEA02 with the version-nr dropped */ + DMI_MATCH(DMI_BIOS_VERSION, "Mx.WT107.KUBNGEA"), + }, + .driver_data = (void *)&predia_basic_data, + }, + { + /* Voyo winpad A15 tablet */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"), + /* Above strings are too generic, also match on BIOS date */ + DMI_MATCH(DMI_BIOS_DATE, "11/20/2014"), + }, + .driver_data = (void *)&voyo_winpad_a15_data, + }, {} }; diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c index 8280092066e7..503d0feb6bbc 100644 --- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c +++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c @@ -635,6 +635,24 @@ const struct iwl_cfg iwl_cfg_snj_a0_mr_a0 = { .num_rbds = IWL_NUM_RBDS_AX210_HE, }; +const struct iwl_cfg iwl_cfg_so_a0_hr_a0 = { + .fw_name_pre = IWL_SO_A_HR_B_FW_PRE, + IWL_DEVICE_AX210, + .num_rbds = IWL_NUM_RBDS_AX210_HE, +}; + +const struct iwl_cfg iwl_cfg_quz_a0_hr_b0 = { + .fw_name_pre = IWL_QUZ_A_HR_B_FW_PRE, + IWL_DEVICE_22500, + /* + * This device doesn't support receiving BlockAck with a large bitmap + * so we need to restrict the size of transmitted aggregation to the + * HT size; mac80211 would otherwise pick the HE max (256) by default. + */ + .max_tx_agg_size = IEEE80211_MAX_AMPDU_BUF_HT, + .num_rbds = IWL_NUM_RBDS_22000_HE, +}; + MODULE_FIRMWARE(IWL_QU_B_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_QNJ_B_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL_QU_C_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX)); diff --git a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c index 895a907acdf0..37ce4fe136c5 100644 --- a/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c +++ b/drivers/net/wireless/intel/iwlwifi/fw/pnvm.c @@ -198,14 +198,14 @@ static int iwl_pnvm_parse(struct iwl_trans *trans, const u8 *data, le32_to_cpu(sku_id->data[1]), le32_to_cpu(sku_id->data[2])); + data += sizeof(*tlv) + ALIGN(tlv_len, 4); + len -= ALIGN(tlv_len, 4); + if (trans->sku_id[0] == le32_to_cpu(sku_id->data[0]) && trans->sku_id[1] == le32_to_cpu(sku_id->data[1]) && trans->sku_id[2] == le32_to_cpu(sku_id->data[2])) { int ret; - data += sizeof(*tlv) + ALIGN(tlv_len, 4); - len -= ALIGN(tlv_len, 4); - ret = iwl_pnvm_handle_section(trans, data, len); if (!ret) return 0; @@ -227,6 +227,7 @@ int iwl_pnvm_load(struct iwl_trans *trans, struct iwl_notification_wait pnvm_wait; static const u16 ntf_cmds[] = { WIDE_ID(REGULATORY_AND_NVM_GROUP, PNVM_INIT_COMPLETE_NTFY) }; + int ret; /* if the SKU_ID is empty, there's nothing to do */ if (!trans->sku_id[0] && !trans->sku_id[1] && !trans->sku_id[2]) @@ -236,7 +237,6 @@ int iwl_pnvm_load(struct iwl_trans *trans, if (!trans->pnvm_loaded) { const struct firmware *pnvm; char pnvm_name[64]; - int ret; /* * The prefix unfortunately includes a hyphen at the end, so @@ -264,6 +264,11 @@ int iwl_pnvm_load(struct iwl_trans *trans, release_firmware(pnvm); } + } else { + /* if we already loaded, we need to set it again */ + ret = iwl_trans_set_pnvm(trans, NULL, 0); + if (ret) + return ret; } iwl_init_notification_wait(notif_wait, &pnvm_wait, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 86e1d57df65e..c72d23d54d90 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -418,6 +418,7 @@ struct iwl_cfg { #define IWL_CFG_MAC_TYPE_QU 0x33 #define IWL_CFG_MAC_TYPE_QUZ 0x35 #define IWL_CFG_MAC_TYPE_QNJ 0x36 +#define IWL_CFG_MAC_TYPE_SO 0x37 #define IWL_CFG_MAC_TYPE_SNJ 0x42 #define IWL_CFG_MAC_TYPE_MA 0x44 @@ -604,6 +605,8 @@ extern const struct iwl_cfg iwlax201_cfg_snj_hr_b0; extern const struct iwl_cfg iwl_cfg_ma_a0_gf_a0; extern const struct iwl_cfg iwl_cfg_ma_a0_mr_a0; extern const struct iwl_cfg iwl_cfg_snj_a0_mr_a0; +extern const struct iwl_cfg iwl_cfg_so_a0_hr_a0; +extern const struct iwl_cfg iwl_cfg_quz_a0_hr_b0; #endif /* CONFIG_IWLMVM */ #endif /* __IWL_CONFIG_H__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 313e9f106f46..4c5609cdcbde 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -859,12 +859,10 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm) if (cmd_ver == 3) { len = sizeof(cmd.v3); n_bands = ARRAY_SIZE(cmd.v3.table[0]); - cmd.v3.table_revision = cpu_to_le32(mvm->fwrt.geo_rev); } else if (fw_has_api(&mvm->fwrt.fw->ucode_capa, IWL_UCODE_TLV_API_SAR_TABLE_VER)) { len = sizeof(cmd.v2); n_bands = ARRAY_SIZE(cmd.v2.table[0]); - cmd.v2.table_revision = cpu_to_le32(mvm->fwrt.geo_rev); } else { len = sizeof(cmd.v1); n_bands = ARRAY_SIZE(cmd.v1.table[0]); @@ -884,6 +882,16 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm) if (ret) return 0; + /* + * Set the revision on versions that contain it. + * This must be done after calling iwl_sar_geo_init(). + */ + if (cmd_ver == 3) + cmd.v3.table_revision = cpu_to_le32(mvm->fwrt.geo_rev); + else if (fw_has_api(&mvm->fwrt.fw->ucode_capa, + IWL_UCODE_TLV_API_SAR_TABLE_VER)) + cmd.v2.table_revision = cpu_to_le32(mvm->fwrt.geo_rev); + return iwl_mvm_send_cmd_pdu(mvm, WIDE_ID(PHY_OPS_GROUP, GEO_TX_POWER_LIMIT), 0, len, &cmd); @@ -892,7 +900,6 @@ static int iwl_mvm_sar_geo_init(struct iwl_mvm *mvm) static int iwl_mvm_get_ppag_table(struct iwl_mvm *mvm) { union acpi_object *wifi_pkg, *data, *enabled; - union iwl_ppag_table_cmd ppag_table; int i, j, ret, tbl_rev, num_sub_bands; int idx = 2; s8 *gain; @@ -946,8 +953,8 @@ static int iwl_mvm_get_ppag_table(struct iwl_mvm *mvm) goto out_free; } - ppag_table.v1.enabled = cpu_to_le32(enabled->integer.value); - if (!ppag_table.v1.enabled) { + mvm->fwrt.ppag_table.v1.enabled = cpu_to_le32(enabled->integer.value); + if (!mvm->fwrt.ppag_table.v1.enabled) { ret = 0; goto out_free; } @@ -962,16 +969,23 @@ static int iwl_mvm_get_ppag_table(struct iwl_mvm *mvm) union acpi_object *ent; ent = &wifi_pkg->package.elements[idx++]; - if (ent->type != ACPI_TYPE_INTEGER || - (j == 0 && ent->integer.value > ACPI_PPAG_MAX_LB) || - (j == 0 && ent->integer.value < ACPI_PPAG_MIN_LB) || - (j != 0 && ent->integer.value > ACPI_PPAG_MAX_HB) || - (j != 0 && ent->integer.value < ACPI_PPAG_MIN_HB)) { - ppag_table.v1.enabled = cpu_to_le32(0); + if (ent->type != ACPI_TYPE_INTEGER) { ret = -EINVAL; goto out_free; } + gain[i * num_sub_bands + j] = ent->integer.value; + + if ((j == 0 && + (gain[i * num_sub_bands + j] > ACPI_PPAG_MAX_LB || + gain[i * num_sub_bands + j] < ACPI_PPAG_MIN_LB)) || + (j != 0 && + (gain[i * num_sub_bands + j] > ACPI_PPAG_MAX_HB || + gain[i * num_sub_bands + j] < ACPI_PPAG_MIN_HB))) { + mvm->fwrt.ppag_table.v1.enabled = cpu_to_le32(0); + ret = -EINVAL; + goto out_free; + } } } ret = 0; @@ -984,7 +998,6 @@ int iwl_mvm_ppag_send_cmd(struct iwl_mvm *mvm) { u8 cmd_ver; int i, j, ret, num_sub_bands, cmd_size; - union iwl_ppag_table_cmd ppag_table; s8 *gain; if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_SET_PPAG)) { @@ -1003,7 +1016,7 @@ int iwl_mvm_ppag_send_cmd(struct iwl_mvm *mvm) if (cmd_ver == 1) { num_sub_bands = IWL_NUM_SUB_BANDS; gain = mvm->fwrt.ppag_table.v1.gain[0]; - cmd_size = sizeof(ppag_table.v1); + cmd_size = sizeof(mvm->fwrt.ppag_table.v1); if (mvm->fwrt.ppag_ver == 2) { IWL_DEBUG_RADIO(mvm, "PPAG table is v2 but FW supports v1, sending truncated table\n"); @@ -1011,7 +1024,7 @@ int iwl_mvm_ppag_send_cmd(struct iwl_mvm *mvm) } else if (cmd_ver == 2) { num_sub_bands = IWL_NUM_SUB_BANDS_V2; gain = mvm->fwrt.ppag_table.v2.gain[0]; - cmd_size = sizeof(ppag_table.v2); + cmd_size = sizeof(mvm->fwrt.ppag_table.v2); if (mvm->fwrt.ppag_ver == 1) { IWL_DEBUG_RADIO(mvm, "PPAG table is v1 but FW supports v2, sending padded table\n"); @@ -1031,7 +1044,7 @@ int iwl_mvm_ppag_send_cmd(struct iwl_mvm *mvm) IWL_DEBUG_RADIO(mvm, "Sending PER_PLATFORM_ANT_GAIN_CMD\n"); ret = iwl_mvm_send_cmd_pdu(mvm, WIDE_ID(PHY_OPS_GROUP, PER_PLATFORM_ANT_GAIN_CMD), - 0, cmd_size, &ppag_table); + 0, cmd_size, &mvm->fwrt.ppag_table); if (ret < 0) IWL_ERR(mvm, "failed to send PER_PLATFORM_ANT_GAIN_CMD (%d)\n", ret); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c index 4e1bdf13e5e7..0b012f8c9eb2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c @@ -999,9 +999,6 @@ void iwl_mvm_remove_csa_period(struct iwl_mvm *mvm, lockdep_assert_held(&mvm->mutex); - if (!te_data->running) - return; - spin_lock_bh(&mvm->time_event_lock); id = te_data->id; spin_unlock_bh(&mvm->time_event_lock); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c index 5b5134dd49af..8fba190e84cf 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c @@ -298,17 +298,20 @@ int iwl_trans_pcie_ctx_info_gen3_set_pnvm(struct iwl_trans *trans, if (trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_AX210) return 0; - ret = iwl_pcie_ctxt_info_alloc_dma(trans, data, len, - &trans_pcie->pnvm_dram); - if (ret < 0) { - IWL_DEBUG_FW(trans, "Failed to allocate PNVM DMA %d.\n", - ret); - return ret; + /* only allocate the DRAM if not allocated yet */ + if (!trans->pnvm_loaded) { + if (WARN_ON(prph_sc_ctrl->pnvm_cfg.pnvm_size)) + return -EBUSY; + + ret = iwl_pcie_ctxt_info_alloc_dma(trans, data, len, + &trans_pcie->pnvm_dram); + if (ret < 0) { + IWL_DEBUG_FW(trans, "Failed to allocate PNVM DMA %d.\n", + ret); + return ret; + } } - if (WARN_ON(prph_sc_ctrl->pnvm_cfg.pnvm_size)) - return -EBUSY; - prph_sc_ctrl->pnvm_cfg.pnvm_base_addr = cpu_to_le64(trans_pcie->pnvm_dram.physical); prph_sc_ctrl->pnvm_cfg.pnvm_size = diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index ed3f5b7aa71e..c55faa388948 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -934,6 +934,11 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, IWL_CFG_ANY, IWL_CFG_ANY, iwl_quz_a0_hr1_b0, iwl_ax101_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_QUZ, SILICON_B_STEP, + IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, + IWL_CFG_NO_160, IWL_CFG_ANY, + iwl_cfg_quz_a0_hr_b0, iwl_ax203_name), /* Ma */ _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, @@ -952,6 +957,27 @@ static const struct iwl_dev_info iwl_dev_info_table[] = { IWL_CFG_ANY, IWL_CFG_ANY, iwl_cfg_snj_a0_mr_a0, iwl_ma_name), +/* So with Hr */ + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, + IWL_CFG_NO_160, IWL_CFG_ANY, + iwl_cfg_so_a0_hr_a0, iwl_ax203_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, + IWL_CFG_NO_160, IWL_CFG_ANY, + iwl_cfg_so_a0_hr_a0, iwl_ax203_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY, + IWL_CFG_160, IWL_CFG_ANY, + iwl_cfg_so_a0_hr_a0, iwl_ax101_name), + _IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY, + IWL_CFG_MAC_TYPE_SO, IWL_CFG_ANY, + IWL_CFG_RF_TYPE_HR2, IWL_CFG_ANY, + IWL_CFG_160, IWL_CFG_ANY, + iwl_cfg_so_a0_hr_a0, iwl_ax201_name) #endif /* CONFIG_IWLMVM */ }; diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index e81dfaf99bcb..680c899a96d7 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -345,7 +345,6 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q, }; struct ieee80211_hw *hw; int len, n = 0, ret = -ENOMEM; - struct mt76_queue_entry e; struct mt76_txwi_cache *t; struct sk_buff *iter; dma_addr_t addr; @@ -387,6 +386,11 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q, } tx_info.nbuf = n; + if (q->queued + (tx_info.nbuf + 1) / 2 >= q->ndesc - 1) { + ret = -ENOMEM; + goto unmap; + } + dma_sync_single_for_cpu(dev->dev, t->dma_addr, dev->drv->txwi_size, DMA_TO_DEVICE); ret = dev->drv->tx_prepare_skb(dev, txwi, q->qid, wcid, sta, &tx_info); @@ -395,11 +399,6 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q, if (ret < 0) goto unmap; - if (q->queued + (tx_info.nbuf + 1) / 2 >= q->ndesc - 1) { - ret = -ENOMEM; - goto unmap; - } - return mt76_dma_add_buf(dev, q, tx_info.buf, tx_info.nbuf, tx_info.info, tx_info.skb, t); @@ -415,9 +414,7 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q, dev->test.tx_done--; #endif - e.skb = tx_info.skb; - e.txwi = t; - dev->drv->tx_complete_skb(dev, &e); + dev_kfree_skb(tx_info.skb); mt76_put_txwi(dev, t); return ret; } @@ -511,13 +508,13 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data, { struct sk_buff *skb = q->rx_head; struct skb_shared_info *shinfo = skb_shinfo(skb); + int nr_frags = shinfo->nr_frags; - if (shinfo->nr_frags < ARRAY_SIZE(shinfo->frags)) { + if (nr_frags < ARRAY_SIZE(shinfo->frags)) { struct page *page = virt_to_head_page(data); int offset = data - page_address(page) + q->buf_offset; - skb_add_rx_frag(skb, shinfo->nr_frags, page, offset, len, - q->buf_size); + skb_add_rx_frag(skb, nr_frags, page, offset, len, q->buf_size); } else { skb_free_frag(data); } @@ -526,7 +523,10 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data, return; q->rx_head = NULL; - dev->drv->rx_skb(dev, q - dev->q_rx, skb); + if (nr_frags < ARRAY_SIZE(shinfo->frags)) + dev->drv->rx_skb(dev, q - dev->q_rx, skb); + else + dev_kfree_skb(skb); } static int diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c index 0f360be0b885..fb10a6497ed0 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c @@ -2058,6 +2058,23 @@ void mt7615_dma_reset(struct mt7615_dev *dev) } EXPORT_SYMBOL_GPL(mt7615_dma_reset); +void mt7615_tx_token_put(struct mt7615_dev *dev) +{ + struct mt76_txwi_cache *txwi; + int id; + + spin_lock_bh(&dev->token_lock); + idr_for_each_entry(&dev->token, txwi, id) { + mt7615_txp_skb_unmap(&dev->mt76, txwi); + if (txwi->skb) + dev_kfree_skb_any(txwi->skb); + mt76_put_txwi(&dev->mt76, txwi); + } + spin_unlock_bh(&dev->token_lock); + idr_destroy(&dev->token); +} +EXPORT_SYMBOL_GPL(mt7615_tx_token_put); + void mt7615_mac_reset_work(struct work_struct *work) { struct mt7615_phy *phy2; @@ -2101,6 +2118,9 @@ void mt7615_mac_reset_work(struct work_struct *work) mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_PDMA_STOPPED); + mt7615_tx_token_put(dev); + idr_init(&dev->token); + if (mt7615_wait_reset_state(dev, MT_MCU_CMD_RESET_DONE)) { mt7615_dma_reset(dev); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h index 99b8abdbb08f..d697ff2ea56e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h +++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h @@ -583,7 +583,7 @@ int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, struct mt76_tx_info *tx_info); void mt7615_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue_entry *e); - +void mt7615_tx_token_put(struct mt7615_dev *dev); void mt7615_queue_rx_skb(struct mt76_dev *mdev, enum mt76_rxq_id q, struct sk_buff *skb); void mt7615_sta_ps(struct mt76_dev *mdev, struct ieee80211_sta *sta, bool ps); diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c index 27fcb1374685..58a0ec1bf8d7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c @@ -160,9 +160,7 @@ int mt7615_register_device(struct mt7615_dev *dev) void mt7615_unregister_device(struct mt7615_dev *dev) { - struct mt76_txwi_cache *txwi; bool mcu_running; - int id; mcu_running = mt7615_wait_for_mcu_init(dev); @@ -172,15 +170,7 @@ void mt7615_unregister_device(struct mt7615_dev *dev) mt7615_mcu_exit(dev); mt7615_dma_cleanup(dev); - spin_lock_bh(&dev->token_lock); - idr_for_each_entry(&dev->token, txwi, id) { - mt7615_txp_skb_unmap(&dev->mt76, txwi); - if (txwi->skb) - dev_kfree_skb_any(txwi->skb); - mt76_put_txwi(&dev->mt76, txwi); - } - spin_unlock_bh(&dev->token_lock); - idr_destroy(&dev->token); + mt7615_tx_token_put(dev); tasklet_disable(&dev->irq_tasklet); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c index 102a8f14c22d..2ec18aaa8280 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c @@ -672,28 +672,12 @@ int mt7915_register_device(struct mt7915_dev *dev) void mt7915_unregister_device(struct mt7915_dev *dev) { - struct mt76_txwi_cache *txwi; - int id; - mt7915_unregister_ext_phy(dev); mt76_unregister_device(&dev->mt76); mt7915_mcu_exit(dev); mt7915_dma_cleanup(dev); - spin_lock_bh(&dev->token_lock); - idr_for_each_entry(&dev->token, txwi, id) { - mt7915_txp_skb_unmap(&dev->mt76, txwi); - if (txwi->skb) { - struct ieee80211_hw *hw; - - hw = mt76_tx_status_get_hw(&dev->mt76, txwi->skb); - ieee80211_free_txskb(hw, txwi->skb); - } - mt76_put_txwi(&dev->mt76, txwi); - dev->token_count--; - } - spin_unlock_bh(&dev->token_lock); - idr_destroy(&dev->token); + mt7915_tx_token_put(dev); mt76_free_device(&dev->mt76); } diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c index f504eeb221f9..c9dd6867e125 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c @@ -957,11 +957,6 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, } txp->nbuf = nbuf; - /* pass partial skb header to fw */ - tx_info->buf[1].len = MT_CT_PARSE_LEN; - tx_info->buf[1].skip_unmap = true; - tx_info->nbuf = MT_CT_DMA_BUF_NUM; - txp->flags = cpu_to_le16(MT_CT_INFO_APPLY_TXD | MT_CT_INFO_FROM_HOST); if (!key) @@ -999,6 +994,11 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, txp->rept_wds_wcid = cpu_to_le16(0x3ff); tx_info->skb = DMA_DUMMY_DATA; + /* pass partial skb header to fw */ + tx_info->buf[1].len = MT_CT_PARSE_LEN; + tx_info->buf[1].skip_unmap = true; + tx_info->nbuf = MT_CT_DMA_BUF_NUM; + return 0; } @@ -1485,6 +1485,27 @@ mt7915_dma_reset(struct mt7915_phy *phy) MT_WFDMA1_GLO_CFG_TX_DMA_EN | MT_WFDMA1_GLO_CFG_RX_DMA_EN); } +void mt7915_tx_token_put(struct mt7915_dev *dev) +{ + struct mt76_txwi_cache *txwi; + int id; + + spin_lock_bh(&dev->token_lock); + idr_for_each_entry(&dev->token, txwi, id) { + mt7915_txp_skb_unmap(&dev->mt76, txwi); + if (txwi->skb) { + struct ieee80211_hw *hw; + + hw = mt76_tx_status_get_hw(&dev->mt76, txwi->skb); + ieee80211_free_txskb(hw, txwi->skb); + } + mt76_put_txwi(&dev->mt76, txwi); + dev->token_count--; + } + spin_unlock_bh(&dev->token_lock); + idr_destroy(&dev->token); +} + /* system error recovery */ void mt7915_mac_reset_work(struct work_struct *work) { @@ -1525,6 +1546,9 @@ void mt7915_mac_reset_work(struct work_struct *work) mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_DMA_STOPPED); + mt7915_tx_token_put(dev); + idr_init(&dev->token); + if (mt7915_wait_reset_state(dev, MT_MCU_CMD_RESET_DONE)) { mt7915_dma_reset(&dev->phy); diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h index 0339abf360d3..94bed8a3a050 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h @@ -463,6 +463,7 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr, struct ieee80211_sta *sta, struct mt76_tx_info *tx_info); void mt7915_tx_complete_skb(struct mt76_dev *mdev, struct mt76_queue_entry *e); +void mt7915_tx_token_put(struct mt7915_dev *dev); int mt7915_init_tx_queues(struct mt7915_phy *phy, int idx, int n_desc); void mt7915_queue_rx_skb(struct mt76_dev *mdev, enum mt76_rxq_id q, struct sk_buff *skb); diff --git a/drivers/net/wireless/microchip/wilc1000/netdev.c b/drivers/net/wireless/microchip/wilc1000/netdev.c index 2a1fbbdd6a4b..0c188310919e 100644 --- a/drivers/net/wireless/microchip/wilc1000/netdev.c +++ b/drivers/net/wireless/microchip/wilc1000/netdev.c @@ -737,7 +737,7 @@ netdev_tx_t wilc_mac_xmit(struct sk_buff *skb, struct net_device *ndev) vif->netstats.tx_packets++; vif->netstats.tx_bytes += tx_data->size; - queue_count = wilc_wlan_txq_add_net_pkt(ndev, (void *)tx_data, + queue_count = wilc_wlan_txq_add_net_pkt(ndev, tx_data, tx_data->buff, tx_data->size, wilc_tx_complete); diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.c b/drivers/net/wireless/microchip/wilc1000/wlan.c index c12f27be9f79..31d51385ba93 100644 --- a/drivers/net/wireless/microchip/wilc1000/wlan.c +++ b/drivers/net/wireless/microchip/wilc1000/wlan.c @@ -408,7 +408,8 @@ static inline u8 ac_change(struct wilc *wilc, u8 *ac) return 1; } -int wilc_wlan_txq_add_net_pkt(struct net_device *dev, void *priv, u8 *buffer, +int wilc_wlan_txq_add_net_pkt(struct net_device *dev, + struct tx_complete_data *tx_data, u8 *buffer, u32 buffer_size, void (*tx_complete_fn)(void *, int)) { @@ -420,27 +421,27 @@ int wilc_wlan_txq_add_net_pkt(struct net_device *dev, void *priv, u8 *buffer, wilc = vif->wilc; if (wilc->quit) { - tx_complete_fn(priv, 0); + tx_complete_fn(tx_data, 0); return 0; } tqe = kmalloc(sizeof(*tqe), GFP_ATOMIC); if (!tqe) { - tx_complete_fn(priv, 0); + tx_complete_fn(tx_data, 0); return 0; } tqe->type = WILC_NET_PKT; tqe->buffer = buffer; tqe->buffer_size = buffer_size; tqe->tx_complete_func = tx_complete_fn; - tqe->priv = priv; + tqe->priv = tx_data; tqe->vif = vif; - q_num = ac_classify(wilc, priv); + q_num = ac_classify(wilc, tx_data->skb); tqe->q_num = q_num; if (ac_change(wilc, &q_num)) { - tx_complete_fn(priv, 0); + tx_complete_fn(tx_data, 0); kfree(tqe); return 0; } @@ -451,7 +452,7 @@ int wilc_wlan_txq_add_net_pkt(struct net_device *dev, void *priv, u8 *buffer, tcp_process(dev, tqe); wilc_wlan_txq_add_to_tail(dev, q_num, tqe); } else { - tx_complete_fn(priv, 0); + tx_complete_fn(tx_data, 0); kfree(tqe); } diff --git a/drivers/net/wireless/microchip/wilc1000/wlan.h b/drivers/net/wireless/microchip/wilc1000/wlan.h index 3d2104f19819..d55eb6b3a12a 100644 --- a/drivers/net/wireless/microchip/wilc1000/wlan.h +++ b/drivers/net/wireless/microchip/wilc1000/wlan.h @@ -399,7 +399,8 @@ int wilc_wlan_firmware_download(struct wilc *wilc, const u8 *buffer, u32 buffer_size); int wilc_wlan_start(struct wilc *wilc); int wilc_wlan_stop(struct wilc *wilc, struct wilc_vif *vif); -int wilc_wlan_txq_add_net_pkt(struct net_device *dev, void *priv, u8 *buffer, +int wilc_wlan_txq_add_net_pkt(struct net_device *dev, + struct tx_complete_data *tx_data, u8 *buffer, u32 buffer_size, void (*tx_complete_fn)(void *, int)); int wilc_wlan_handle_txq(struct wilc *wl, u32 *txq_count); diff --git a/drivers/net/wireless/ti/wl12xx/main.c b/drivers/net/wireless/ti/wl12xx/main.c index 3c9c623bb428..9d7dbfe7fe0c 100644 --- a/drivers/net/wireless/ti/wl12xx/main.c +++ b/drivers/net/wireless/ti/wl12xx/main.c @@ -635,7 +635,6 @@ static int wl12xx_identify_chip(struct wl1271 *wl) wl->quirks |= WLCORE_QUIRK_LEGACY_NVS | WLCORE_QUIRK_DUAL_PROBE_TMPL | WLCORE_QUIRK_TKIP_HEADER_SPACE | - WLCORE_QUIRK_START_STA_FAILS | WLCORE_QUIRK_AP_ZERO_SESSION_ID; wl->sr_fw_name = WL127X_FW_NAME_SINGLE; wl->mr_fw_name = WL127X_FW_NAME_MULTI; @@ -659,7 +658,6 @@ static int wl12xx_identify_chip(struct wl1271 *wl) wl->quirks |= WLCORE_QUIRK_LEGACY_NVS | WLCORE_QUIRK_DUAL_PROBE_TMPL | WLCORE_QUIRK_TKIP_HEADER_SPACE | - WLCORE_QUIRK_START_STA_FAILS | WLCORE_QUIRK_AP_ZERO_SESSION_ID; wl->plt_fw_name = WL127X_PLT_FW_NAME; wl->sr_fw_name = WL127X_FW_NAME_SINGLE; @@ -688,7 +686,6 @@ static int wl12xx_identify_chip(struct wl1271 *wl) wl->quirks |= WLCORE_QUIRK_TX_BLOCKSIZE_ALIGN | WLCORE_QUIRK_DUAL_PROBE_TMPL | WLCORE_QUIRK_TKIP_HEADER_SPACE | - WLCORE_QUIRK_START_STA_FAILS | WLCORE_QUIRK_AP_ZERO_SESSION_ID; wlcore_set_min_fw_ver(wl, WL128X_CHIP_VER, diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 122c7a4b374f..0f9cc3de6aeb 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -2872,21 +2872,8 @@ static int wlcore_join(struct wl1271 *wl, struct wl12xx_vif *wlvif) if (is_ibss) ret = wl12xx_cmd_role_start_ibss(wl, wlvif); - else { - if (wl->quirks & WLCORE_QUIRK_START_STA_FAILS) { - /* - * TODO: this is an ugly workaround for wl12xx fw - * bug - we are not able to tx/rx after the first - * start_sta, so make dummy start+stop calls, - * and then call start_sta again. - * this should be fixed in the fw. - */ - wl12xx_cmd_role_start_sta(wl, wlvif); - wl12xx_cmd_role_stop_sta(wl, wlvif); - } - + else ret = wl12xx_cmd_role_start_sta(wl, wlvif); - } return ret; } diff --git a/drivers/net/wireless/ti/wlcore/wlcore.h b/drivers/net/wireless/ti/wlcore/wlcore.h index b7821311ac75..81c94d390623 100644 --- a/drivers/net/wireless/ti/wlcore/wlcore.h +++ b/drivers/net/wireless/ti/wlcore/wlcore.h @@ -547,9 +547,6 @@ wlcore_set_min_fw_ver(struct wl1271 *wl, unsigned int chip, /* Each RX/TX transaction requires an end-of-transaction transfer */ #define WLCORE_QUIRK_END_OF_TRANSACTION BIT(0) -/* the first start_role(sta) sometimes doesn't work on wl12xx */ -#define WLCORE_QUIRK_START_STA_FAILS BIT(1) - /* wl127x and SPI don't support SDIO block size alignment */ #define WLCORE_QUIRK_TX_BLOCKSIZE_ALIGN BIT(2) diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index acb786d8b1d8..e02a4fbb74de 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -162,13 +162,15 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id) { struct xenvif_queue *queue = dev_id; int old; + bool has_rx, has_tx; old = atomic_fetch_or(NETBK_COMMON_EOI, &queue->eoi_pending); WARN(old, "Interrupt while EOI pending\n"); - /* Use bitwise or as we need to call both functions. */ - if ((!xenvif_handle_tx_interrupt(queue) | - !xenvif_handle_rx_interrupt(queue))) { + has_tx = xenvif_handle_tx_interrupt(queue); + has_rx = xenvif_handle_rx_interrupt(queue); + + if (!has_rx && !has_tx) { atomic_andnot(NETBK_COMMON_EOI, &queue->eoi_pending); xen_irq_lateeoi(irq, XEN_EOI_FLAG_SPURIOUS); } diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index bc3421d14576..986b56970961 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -1347,7 +1347,15 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget) NULL, queue->pages_to_map, nr_mops); - BUG_ON(ret); + if (ret) { + unsigned int i; + + netdev_err(queue->vif->dev, "Map fail: nr %u ret %d\n", + nr_mops, ret); + for (i = 0; i < nr_mops; ++i) + WARN_ON_ONCE(queue->tx_map_ops[i].status == + GNTST_okay); + } } work_done = xenvif_tx_submit(queue); diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f13eb4ded95f..6199bce5d3a4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -366,11 +366,32 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved) return true; nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD; + nvme_req(req)->flags |= NVME_REQ_CANCELLED; blk_mq_complete_request(req); return true; } EXPORT_SYMBOL_GPL(nvme_cancel_request); +void nvme_cancel_tagset(struct nvme_ctrl *ctrl) +{ + if (ctrl->tagset) { + blk_mq_tagset_busy_iter(ctrl->tagset, + nvme_cancel_request, ctrl); + blk_mq_tagset_wait_completed_request(ctrl->tagset); + } +} +EXPORT_SYMBOL_GPL(nvme_cancel_tagset); + +void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl) +{ + if (ctrl->admin_tagset) { + blk_mq_tagset_busy_iter(ctrl->admin_tagset, + nvme_cancel_request, ctrl); + blk_mq_tagset_wait_completed_request(ctrl->admin_tagset); + } +} +EXPORT_SYMBOL_GPL(nvme_cancel_admin_tagset); + bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state) { @@ -1405,7 +1426,7 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid, goto out_free_id; } - error = -ENODEV; + error = NVME_SC_INVALID_NS | NVME_SC_DNR; if ((*id)->ncap == 0) /* namespace not allocated or attached */ goto out_free_id; @@ -1928,30 +1949,18 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) blk_queue_max_write_zeroes_sectors(queue, UINT_MAX); } -static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) +/* + * Even though NVMe spec explicitly states that MDTS is not applicable to the + * write-zeroes, we are cautious and limit the size to the controllers + * max_hw_sectors value, which is based on the MDTS field and possibly other + * limiting factors. + */ +static void nvme_config_write_zeroes(struct request_queue *q, + struct nvme_ctrl *ctrl) { - u64 max_blocks; - - if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) || - (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) - return; - /* - * Even though NVMe spec explicitly states that MDTS is not - * applicable to the write-zeroes:- "The restriction does not apply to - * commands that do not transfer data between the host and the - * controller (e.g., Write Uncorrectable ro Write Zeroes command).". - * In order to be more cautious use controller's max_hw_sectors value - * to configure the maximum sectors for the write-zeroes which is - * configured based on the controller's MDTS field in the - * nvme_init_identify() if available. - */ - if (ns->ctrl->max_hw_sectors == UINT_MAX) - max_blocks = (u64)USHRT_MAX + 1; - else - max_blocks = ns->ctrl->max_hw_sectors + 1; - - blk_queue_max_write_zeroes_sectors(disk->queue, - nvme_lba_to_sect(ns, max_blocks)); + if ((ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) && + !(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) + blk_queue_max_write_zeroes_sectors(q, ctrl->max_hw_sectors); } static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids) @@ -2123,7 +2132,7 @@ static void nvme_update_disk_info(struct gendisk *disk, set_capacity_and_notify(disk, capacity); nvme_config_discard(disk, ns); - nvme_config_write_zeroes(disk, ns); + nvme_config_write_zeroes(disk->queue, ns->ctrl); if ((id->nsattr & NVME_NS_ATTR_RO) || test_bit(NVME_NS_FORCE_RO, &ns->flags)) @@ -4003,7 +4012,7 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid) static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) { struct nvme_id_ns *id; - int ret = -ENODEV; + int ret = NVME_SC_INVALID_NS | NVME_SC_DNR; if (test_bit(NVME_NS_DEAD, &ns->flags)) goto out; @@ -4012,7 +4021,7 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) if (ret) goto out; - ret = -ENODEV; + ret = NVME_SC_INVALID_NS | NVME_SC_DNR; if (!nvme_ns_ids_equal(&ns->head->ids, ids)) { dev_err(ns->ctrl->device, "identifiers changed for nsid %d\n", ns->head->ns_id); @@ -4030,7 +4039,7 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids) * * TODO: we should probably schedule a delayed retry here. */ - if (ret && ret != -ENOMEM && !(ret > 0 && !(ret & NVME_SC_DNR))) + if (ret > 0 && (ret & NVME_SC_DNR)) nvme_ns_remove(ns); } @@ -4060,6 +4069,12 @@ static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) nsid); break; } + if (!nvme_multi_css(ctrl)) { + dev_warn(ctrl->device, + "command set not reported for nsid: %d\n", + nsid); + break; + } nvme_alloc_ns(ctrl, nsid, &ids); break; default: diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 5f36cfa8136c..ca75338f2367 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1956,7 +1956,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) sizeof(op->rsp_iu), DMA_FROM_DEVICE); if (opstate == FCPOP_STATE_ABORTED) - status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); + status = cpu_to_le16(NVME_SC_HOST_ABORTED_CMD << 1); else if (freq->status) { status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); dev_info(ctrl->ctrl.device, @@ -2055,7 +2055,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) nvme_fc_complete_rq(rq); check_error: - if (terminate_assoc) + if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING) queue_work(nvme_reset_wq, &ctrl->ioerr_work); } @@ -2443,6 +2443,7 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); + op->nreq.flags |= NVME_REQ_CANCELLED; __nvme_fc_abort_op(ctrl, op); return true; } diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 282b7a4ea9a9..fdfc18a222cc 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -677,6 +677,10 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) if (blk_queue_stable_writes(ns->queue) && ns->head->disk) blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES, ns->head->disk->queue); +#ifdef CONFIG_BLK_DEV_ZONED + if (blk_queue_is_zoned(ns->queue) && ns->head->disk) + ns->head->disk->queue->nr_zones = ns->queue->nr_zones; +#endif } void nvme_mpath_remove_disk(struct nvme_ns_head *head) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 88a6b97247f5..a72f07181091 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -576,6 +576,8 @@ static inline bool nvme_is_aen_req(u16 qid, __u16 command_id) void nvme_complete_rq(struct request *req); bool nvme_cancel_request(struct request *req, void *data, bool reserved); +void nvme_cancel_tagset(struct nvme_ctrl *ctrl); +void nvme_cancel_admin_tagset(struct nvme_ctrl *ctrl); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state); bool nvme_wait_reset(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 6bad4d4dcdf0..514dfd630035 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3230,7 +3230,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, }, { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ - .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | + NVME_QUIRK_NO_NS_DESC_LIST, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */ @@ -3241,9 +3242,13 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | + NVME_QUIRK_DISABLE_WRITE_ZEROES| NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */ + .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */ .driver_data = NVME_QUIRK_LIGHTNVM, }, { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ @@ -3261,6 +3266,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, { PCI_DEVICE(0x1d97, 0x2263), /* SPCC */ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, + { PCI_DEVICE(0x2646, 0x2262), /* KINGSTON SKC2000 NVMe SSD */ + .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001), diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index b7ce4f221d99..0c3da10c1f29 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -736,8 +736,11 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) return ret; ctrl->ctrl.queue_count = nr_io_queues + 1; - if (ctrl->ctrl.queue_count < 2) - return 0; + if (ctrl->ctrl.queue_count < 2) { + dev_err(ctrl->ctrl.device, + "unable to set any I/O queues\n"); + return -ENOMEM; + } dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n", nr_io_queues); @@ -919,12 +922,16 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, error = nvme_init_identify(&ctrl->ctrl); if (error) - goto out_stop_queue; + goto out_quiesce_queue; return 0; +out_quiesce_queue: + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + blk_sync_queue(ctrl->ctrl.admin_q); out_stop_queue: nvme_rdma_stop_queue(&ctrl->queues[0]); + nvme_cancel_admin_tagset(&ctrl->ctrl); out_cleanup_queue: if (new) blk_cleanup_queue(ctrl->ctrl.admin_q); @@ -1001,8 +1008,10 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) out_wait_freeze_timed_out: nvme_stop_queues(&ctrl->ctrl); + nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); out_cleanup_connect_q: + nvme_cancel_tagset(&ctrl->ctrl); if (new) blk_cleanup_queue(ctrl->ctrl.connect_q); out_free_tag_set: @@ -1144,10 +1153,18 @@ static int nvme_rdma_setup_ctrl(struct nvme_rdma_ctrl *ctrl, bool new) return 0; destroy_io: - if (ctrl->ctrl.queue_count > 1) + if (ctrl->ctrl.queue_count > 1) { + nvme_stop_queues(&ctrl->ctrl); + nvme_sync_io_queues(&ctrl->ctrl); + nvme_rdma_stop_io_queues(ctrl); + nvme_cancel_tagset(&ctrl->ctrl); nvme_rdma_destroy_io_queues(ctrl, new); + } destroy_admin: + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); + nvme_cancel_admin_tagset(&ctrl->ctrl); nvme_rdma_destroy_admin_queue(ctrl, new); return ret; } diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 881d28eb15e9..c6958e5bc91d 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -287,7 +287,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, * directly, otherwise queue io_work. Also, only do that if we * are on the same cpu, so we don't introduce contention. */ - if (queue->io_cpu == __smp_processor_id() && + if (queue->io_cpu == raw_smp_processor_id() && sync && empty && mutex_trylock(&queue->send_mutex)) { queue->more_requests = !last; nvme_tcp_send_all(queue); @@ -568,6 +568,13 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req, req->pdu_len = le32_to_cpu(pdu->r2t_length); req->pdu_sent = 0; + if (unlikely(!req->pdu_len)) { + dev_err(queue->ctrl->ctrl.device, + "req %d r2t len is %u, probably a bug...\n", + rq->tag, req->pdu_len); + return -EPROTO; + } + if (unlikely(req->data_sent + req->pdu_len > req->data_len)) { dev_err(queue->ctrl->ctrl.device, "req %d r2t len %u exceeded data len %u (%zu sent)\n", @@ -1748,8 +1755,11 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) return ret; ctrl->queue_count = nr_io_queues + 1; - if (ctrl->queue_count < 2) - return 0; + if (ctrl->queue_count < 2) { + dev_err(ctrl->device, + "unable to set any I/O queues\n"); + return -ENOMEM; + } dev_info(ctrl->device, "creating %d I/O queues.\n", nr_io_queues); @@ -1815,8 +1825,10 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) out_wait_freeze_timed_out: nvme_stop_queues(ctrl); + nvme_sync_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); out_cleanup_connect_q: + nvme_cancel_tagset(ctrl); if (new) blk_cleanup_queue(ctrl->connect_q); out_free_tag_set: @@ -1878,12 +1890,16 @@ static int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) error = nvme_init_identify(ctrl); if (error) - goto out_stop_queue; + goto out_quiesce_queue; return 0; +out_quiesce_queue: + blk_mq_quiesce_queue(ctrl->admin_q); + blk_sync_queue(ctrl->admin_q); out_stop_queue: nvme_tcp_stop_queue(ctrl, 0); + nvme_cancel_admin_tagset(ctrl); out_cleanup_queue: if (new) blk_cleanup_queue(ctrl->admin_q); @@ -2003,10 +2019,18 @@ static int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) return 0; destroy_io: - if (ctrl->queue_count > 1) + if (ctrl->queue_count > 1) { + nvme_stop_queues(ctrl); + nvme_sync_io_queues(ctrl); + nvme_tcp_stop_io_queues(ctrl); + nvme_cancel_tagset(ctrl); nvme_tcp_destroy_io_queues(ctrl, new); + } destroy_admin: + blk_mq_quiesce_queue(ctrl->admin_q); + blk_sync_queue(ctrl->admin_q); nvme_tcp_stop_queue(ctrl, 0); + nvme_cancel_admin_tagset(ctrl); nvme_tcp_destroy_admin_queue(ctrl, new); return ret; } diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index dc1ea468b182..1827d8d8f3b0 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -469,7 +469,6 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) static void nvmet_execute_identify_ns(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; - struct nvmet_ns *ns; struct nvme_id_ns *id; u16 status = 0; @@ -486,20 +485,21 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) } /* return an all zeroed buffer if we can't find an active namespace */ - ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid); - if (!ns) { - status = NVME_SC_INVALID_NS; + req->ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid); + if (!req->ns) { + status = 0; goto done; } - nvmet_ns_revalidate(ns); + nvmet_ns_revalidate(req->ns); /* * nuse = ncap = nsze isn't always true, but we have no way to find * that out from the underlying device. */ - id->ncap = id->nsze = cpu_to_le64(ns->size >> ns->blksize_shift); - switch (req->port->ana_state[ns->anagrpid]) { + id->ncap = id->nsze = + cpu_to_le64(req->ns->size >> req->ns->blksize_shift); + switch (req->port->ana_state[req->ns->anagrpid]) { case NVME_ANA_INACCESSIBLE: case NVME_ANA_PERSISTENT_LOSS: break; @@ -508,8 +508,8 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) break; } - if (ns->bdev) - nvmet_bdev_set_limits(ns->bdev, id); + if (req->ns->bdev) + nvmet_bdev_set_limits(req->ns->bdev, id); /* * We just provide a single LBA format that matches what the @@ -523,25 +523,24 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) * controllers, but also with any other user of the block device. */ id->nmic = (1 << 0); - id->anagrpid = cpu_to_le32(ns->anagrpid); + id->anagrpid = cpu_to_le32(req->ns->anagrpid); - memcpy(&id->nguid, &ns->nguid, sizeof(id->nguid)); + memcpy(&id->nguid, &req->ns->nguid, sizeof(id->nguid)); - id->lbaf[0].ds = ns->blksize_shift; + id->lbaf[0].ds = req->ns->blksize_shift; - if (ctrl->pi_support && nvmet_ns_has_pi(ns)) { + if (ctrl->pi_support && nvmet_ns_has_pi(req->ns)) { id->dpc = NVME_NS_DPC_PI_FIRST | NVME_NS_DPC_PI_LAST | NVME_NS_DPC_PI_TYPE1 | NVME_NS_DPC_PI_TYPE2 | NVME_NS_DPC_PI_TYPE3; id->mc = NVME_MC_EXTENDED_LBA; - id->dps = ns->pi_type; + id->dps = req->ns->pi_type; id->flbas = NVME_NS_FLBAS_META_EXT; - id->lbaf[0].ms = cpu_to_le16(ns->metadata_size); + id->lbaf[0].ms = cpu_to_le16(req->ns->metadata_size); } - if (ns->readonly) + if (req->ns->readonly) id->nsattr |= (1 << 0); - nvmet_put_namespace(ns); done: if (!status) status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 8ce4d59cc9e7..870d06cfd815 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1107,9 +1107,20 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) { lockdep_assert_held(&ctrl->lock); - if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || - nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES || - nvmet_cc_mps(ctrl->cc) != 0 || + /* + * Only I/O controllers should verify iosqes,iocqes. + * Strictly speaking, the spec says a discovery controller + * should verify iosqes,iocqes are zeroed, however that + * would break backwards compatibility, so don't enforce it. + */ + if (ctrl->subsys->type != NVME_NQN_DISC && + (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES || + nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES)) { + ctrl->csts = NVME_CSTS_CFS; + return; + } + + if (nvmet_cc_mps(ctrl->cc) != 0 || nvmet_cc_ams(ctrl->cc) != 0 || nvmet_cc_css(ctrl->cc) != 0) { ctrl->csts = NVME_CSTS_CFS; diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 06b6b742bb21..6c1f3ab7649c 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -802,9 +802,8 @@ static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc) nvmet_req_uninit(&rsp->req); nvmet_rdma_release_rsp(rsp); if (wc->status != IB_WC_WR_FLUSH_ERR) { - pr_info("RDMA WRITE for CQE 0x%p failed with status %s (%d).\n", - wc->wr_cqe, ib_wc_status_msg(wc->status), - wc->status); + pr_info("RDMA WRITE for CQE failed with status %s (%d).\n", + ib_wc_status_msg(wc->status), wc->status); nvmet_rdma_error_comp(queue); } return; diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index aacf06f0b431..8b0485ada315 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -379,7 +379,7 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) return NVME_SC_INTERNAL; } -static void nvmet_tcp_ddgst(struct ahash_request *hash, +static void nvmet_tcp_send_ddgst(struct ahash_request *hash, struct nvmet_tcp_cmd *cmd) { ahash_request_set_crypt(hash, cmd->req.sg, @@ -387,6 +387,23 @@ static void nvmet_tcp_ddgst(struct ahash_request *hash, crypto_ahash_digest(hash); } +static void nvmet_tcp_recv_ddgst(struct ahash_request *hash, + struct nvmet_tcp_cmd *cmd) +{ + struct scatterlist sg; + struct kvec *iov; + int i; + + crypto_ahash_init(hash); + for (i = 0, iov = cmd->iov; i < cmd->nr_mapped; i++, iov++) { + sg_init_one(&sg, iov->iov_base, iov->iov_len); + ahash_request_set_crypt(hash, &sg, NULL, iov->iov_len); + crypto_ahash_update(hash); + } + ahash_request_set_crypt(hash, NULL, (void *)&cmd->exp_ddgst, 0); + crypto_ahash_final(hash); +} + static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd) { struct nvme_tcp_data_pdu *pdu = cmd->data_pdu; @@ -411,7 +428,7 @@ static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd) if (queue->data_digest) { pdu->hdr.flags |= NVME_TCP_F_DDGST; - nvmet_tcp_ddgst(queue->snd_hash, cmd); + nvmet_tcp_send_ddgst(queue->snd_hash, cmd); } if (cmd->queue->hdr_digest) { @@ -1060,7 +1077,7 @@ static void nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd *cmd) { struct nvmet_tcp_queue *queue = cmd->queue; - nvmet_tcp_ddgst(queue->rcv_hash, cmd); + nvmet_tcp_recv_ddgst(queue->rcv_hash, cmd); queue->offset = 0; queue->left = NVME_TCP_DIGEST_LENGTH; queue->rcv_state = NVMET_TCP_RECV_DDGST; @@ -1081,14 +1098,14 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue) cmd->rbytes_done += ret; } + if (queue->data_digest) { + nvmet_tcp_prep_recv_ddgst(cmd); + return 0; + } nvmet_tcp_unmap_pdu_iovec(cmd); if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) && cmd->rbytes_done == cmd->req.transfer_len) { - if (queue->data_digest) { - nvmet_tcp_prep_recv_ddgst(cmd); - return 0; - } cmd->req.execute(&cmd->req); } @@ -1468,17 +1485,27 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) if (inet->rcv_tos > 0) ip_sock_set_tos(sock->sk, inet->rcv_tos); + ret = 0; write_lock_bh(&sock->sk->sk_callback_lock); - sock->sk->sk_user_data = queue; - queue->data_ready = sock->sk->sk_data_ready; - sock->sk->sk_data_ready = nvmet_tcp_data_ready; - queue->state_change = sock->sk->sk_state_change; - sock->sk->sk_state_change = nvmet_tcp_state_change; - queue->write_space = sock->sk->sk_write_space; - sock->sk->sk_write_space = nvmet_tcp_write_space; + if (sock->sk->sk_state != TCP_ESTABLISHED) { + /* + * If the socket is already closing, don't even start + * consuming it + */ + ret = -ENOTCONN; + } else { + sock->sk->sk_user_data = queue; + queue->data_ready = sock->sk->sk_data_ready; + sock->sk->sk_data_ready = nvmet_tcp_data_ready; + queue->state_change = sock->sk->sk_state_change; + sock->sk->sk_state_change = nvmet_tcp_state_change; + queue->write_space = sock->sk->sk_write_space; + sock->sk->sk_write_space = nvmet_tcp_write_space; + queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); + } write_unlock_bh(&sock->sk->sk_callback_lock); - return 0; + return ret; } static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, @@ -1526,8 +1553,6 @@ static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, if (ret) goto out_destroy_sq; - queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &queue->io_work); - return 0; out_destroy_sq: mutex_lock(&nvmet_tcp_queue_mutex); diff --git a/drivers/nvmem/core.c b/drivers/nvmem/core.c index 177f5bf27c6d..a5ab1e0c74cf 100644 --- a/drivers/nvmem/core.c +++ b/drivers/nvmem/core.c @@ -682,7 +682,9 @@ static int nvmem_add_cells_from_of(struct nvmem_device *nvmem) for_each_child_of_node(parent, child) { addr = of_get_property(child, "reg", &len); - if (!addr || (len < 2 * sizeof(u32))) { + if (!addr) + continue; + if (len < 2 * sizeof(u32)) { dev_err(dev, "nvmem: invalid reg on %pOF\n", child); return -EINVAL; } @@ -713,6 +715,7 @@ static int nvmem_add_cells_from_of(struct nvmem_device *nvmem) cell->name, nvmem->stride); /* Cells already added will be freed later. */ kfree_const(cell->name); + of_node_put(cell->np); kfree(cell); return -EINVAL; } diff --git a/drivers/nvmem/qcom-spmi-sdam.c b/drivers/nvmem/qcom-spmi-sdam.c index a72704cd0468..f6e9f96933ca 100644 --- a/drivers/nvmem/qcom-spmi-sdam.c +++ b/drivers/nvmem/qcom-spmi-sdam.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2017, 2020 The Linux Foundation. All rights reserved. + * Copyright (c) 2017, 2020-2021, The Linux Foundation. All rights reserved. */ #include @@ -18,7 +18,6 @@ #define SDAM_PBS_TRIG_CLR 0xE6 struct sdam_chip { - struct platform_device *pdev; struct regmap *regmap; struct nvmem_config sdam_config; unsigned int base; @@ -65,7 +64,7 @@ static int sdam_read(void *priv, unsigned int offset, void *val, size_t bytes) { struct sdam_chip *sdam = priv; - struct device *dev = &sdam->pdev->dev; + struct device *dev = sdam->sdam_config.dev; int rc; if (!sdam_is_valid(sdam, offset, bytes)) { @@ -86,7 +85,7 @@ static int sdam_write(void *priv, unsigned int offset, void *val, size_t bytes) { struct sdam_chip *sdam = priv; - struct device *dev = &sdam->pdev->dev; + struct device *dev = sdam->sdam_config.dev; int rc; if (!sdam_is_valid(sdam, offset, bytes)) { diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c index feb0f2d67fc5..dcc1dd96911a 100644 --- a/drivers/of/fdt.c +++ b/drivers/of/fdt.c @@ -1146,8 +1146,16 @@ int __init __weak early_init_dt_mark_hotplug_memory_arch(u64 base, u64 size) int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base, phys_addr_t size, bool nomap) { - if (nomap) - return memblock_remove(base, size); + if (nomap) { + /* + * If the memory is already reserved (by another region), we + * should not allow it to be marked nomap. + */ + if (memblock_is_region_reserved(base, size)) + return -EBUSY; + + return memblock_mark_nomap(base, size); + } return memblock_reserve(base, size); } diff --git a/drivers/opp/core.c b/drivers/opp/core.c index 8c905aabacc0..f4fb43816e59 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -1335,7 +1335,11 @@ static struct dev_pm_opp *_opp_get_next(struct opp_table *opp_table, mutex_lock(&opp_table->lock); list_for_each_entry(temp, &opp_table->opp_list, node) { - if (dynamic == temp->dynamic) { + /* + * Refcount must be dropped only once for each OPP by OPP core, + * do that with help of "removed" flag. + */ + if (!temp->removed && dynamic == temp->dynamic) { opp = temp; break; } @@ -1345,10 +1349,27 @@ static struct dev_pm_opp *_opp_get_next(struct opp_table *opp_table, return opp; } -bool _opp_remove_all_static(struct opp_table *opp_table) +/* + * Can't call dev_pm_opp_put() from under the lock as debugfs removal needs to + * happen lock less to avoid circular dependency issues. This routine must be + * called without the opp_table->lock held. + */ +static void _opp_remove_all(struct opp_table *opp_table, bool dynamic) { struct dev_pm_opp *opp; + while ((opp = _opp_get_next(opp_table, dynamic))) { + opp->removed = true; + dev_pm_opp_put(opp); + + /* Drop the references taken by dev_pm_opp_add() */ + if (dynamic) + dev_pm_opp_put_opp_table(opp_table); + } +} + +bool _opp_remove_all_static(struct opp_table *opp_table) +{ mutex_lock(&opp_table->lock); if (!opp_table->parsed_static_opps) { @@ -1363,13 +1384,7 @@ bool _opp_remove_all_static(struct opp_table *opp_table) mutex_unlock(&opp_table->lock); - /* - * Can't remove the OPP from under the lock, debugfs removal needs to - * happen lock less to avoid circular dependency issues. - */ - while ((opp = _opp_get_next(opp_table, false))) - dev_pm_opp_put(opp); - + _opp_remove_all(opp_table, false); return true; } @@ -1382,25 +1397,12 @@ bool _opp_remove_all_static(struct opp_table *opp_table) void dev_pm_opp_remove_all_dynamic(struct device *dev) { struct opp_table *opp_table; - struct dev_pm_opp *opp; - int count = 0; opp_table = _find_opp_table(dev); if (IS_ERR(opp_table)) return; - /* - * Can't remove the OPP from under the lock, debugfs removal needs to - * happen lock less to avoid circular dependency issues. - */ - while ((opp = _opp_get_next(opp_table, true))) { - dev_pm_opp_put(opp); - count++; - } - - /* Drop the references taken by dev_pm_opp_add() */ - while (count--) - dev_pm_opp_put_opp_table(opp_table); + _opp_remove_all(opp_table, true); /* Drop the reference taken by _find_opp_table() */ dev_pm_opp_put_opp_table(opp_table); diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 03cb387236c4..d0c0336be39b 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -755,7 +755,6 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table, struct device *dev, struct device_node *np) { struct dev_pm_opp *new_opp; - u64 rate = 0; u32 val; int ret; bool rate_not_available = false; @@ -772,7 +771,8 @@ static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table, /* Check if the OPP supports hardware's hierarchy of versions or not */ if (!_opp_is_supported(dev, opp_table, np)) { - dev_dbg(dev, "OPP not supported by hardware: %llu\n", rate); + dev_dbg(dev, "OPP not supported by hardware: %lu\n", + new_opp->rate); goto free_opp; } diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index 4ced7ffa8158..8dca37662dd8 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -56,6 +56,7 @@ extern struct list_head opp_tables; * @dynamic: not-created from static DT entries. * @turbo: true if turbo (boost) OPP * @suspend: true if suspend OPP + * @removed: flag indicating that OPP's reference is dropped by OPP core. * @pstate: Device's power domain's performance state. * @rate: Frequency in hertz * @level: Performance level @@ -78,6 +79,7 @@ struct dev_pm_opp { bool dynamic; bool turbo; bool suspend; + bool removed; unsigned int pstate; unsigned long rate; unsigned int level; diff --git a/drivers/pci/controller/cadence/pci-j721e.c b/drivers/pci/controller/cadence/pci-j721e.c index dac1ac8a7615..849f1e416ea5 100644 --- a/drivers/pci/controller/cadence/pci-j721e.c +++ b/drivers/pci/controller/cadence/pci-j721e.c @@ -64,6 +64,7 @@ enum j721e_pcie_mode { struct j721e_pcie_data { enum j721e_pcie_mode mode; + bool quirk_retrain_flag; }; static inline u32 j721e_pcie_user_readl(struct j721e_pcie *pcie, u32 offset) @@ -280,6 +281,7 @@ static struct pci_ops cdns_ti_pcie_host_ops = { static const struct j721e_pcie_data j721e_pcie_rc_data = { .mode = PCI_MODE_RC, + .quirk_retrain_flag = true, }; static const struct j721e_pcie_data j721e_pcie_ep_data = { @@ -388,6 +390,7 @@ static int j721e_pcie_probe(struct platform_device *pdev) bridge->ops = &cdns_ti_pcie_host_ops; rc = pci_host_bridge_priv(bridge); + rc->quirk_retrain_flag = data->quirk_retrain_flag; cdns_pcie = &rc->pcie; cdns_pcie->dev = dev; diff --git a/drivers/pci/controller/cadence/pcie-cadence-host.c b/drivers/pci/controller/cadence/pcie-cadence-host.c index 811c1cb2e8de..73dcf8cf98fb 100644 --- a/drivers/pci/controller/cadence/pcie-cadence-host.c +++ b/drivers/pci/controller/cadence/pcie-cadence-host.c @@ -77,6 +77,68 @@ static struct pci_ops cdns_pcie_host_ops = { .write = pci_generic_config_write, }; +static int cdns_pcie_host_wait_for_link(struct cdns_pcie *pcie) +{ + struct device *dev = pcie->dev; + int retries; + + /* Check if the link is up or not */ + for (retries = 0; retries < LINK_WAIT_MAX_RETRIES; retries++) { + if (cdns_pcie_link_up(pcie)) { + dev_info(dev, "Link up\n"); + return 0; + } + usleep_range(LINK_WAIT_USLEEP_MIN, LINK_WAIT_USLEEP_MAX); + } + + return -ETIMEDOUT; +} + +static int cdns_pcie_retrain(struct cdns_pcie *pcie) +{ + u32 lnk_cap_sls, pcie_cap_off = CDNS_PCIE_RP_CAP_OFFSET; + u16 lnk_stat, lnk_ctl; + int ret = 0; + + /* + * Set retrain bit if current speed is 2.5 GB/s, + * but the PCIe root port support is > 2.5 GB/s. + */ + + lnk_cap_sls = cdns_pcie_readl(pcie, (CDNS_PCIE_RP_BASE + pcie_cap_off + + PCI_EXP_LNKCAP)); + if ((lnk_cap_sls & PCI_EXP_LNKCAP_SLS) <= PCI_EXP_LNKCAP_SLS_2_5GB) + return ret; + + lnk_stat = cdns_pcie_rp_readw(pcie, pcie_cap_off + PCI_EXP_LNKSTA); + if ((lnk_stat & PCI_EXP_LNKSTA_CLS) == PCI_EXP_LNKSTA_CLS_2_5GB) { + lnk_ctl = cdns_pcie_rp_readw(pcie, + pcie_cap_off + PCI_EXP_LNKCTL); + lnk_ctl |= PCI_EXP_LNKCTL_RL; + cdns_pcie_rp_writew(pcie, pcie_cap_off + PCI_EXP_LNKCTL, + lnk_ctl); + + ret = cdns_pcie_host_wait_for_link(pcie); + } + return ret; +} + +static int cdns_pcie_host_start_link(struct cdns_pcie_rc *rc) +{ + struct cdns_pcie *pcie = &rc->pcie; + int ret; + + ret = cdns_pcie_host_wait_for_link(pcie); + + /* + * Retrain link for Gen2 training defect + * if quirk flag is set. + */ + if (!ret && rc->quirk_retrain_flag) + ret = cdns_pcie_retrain(pcie); + + return ret; +} static int cdns_pcie_host_init_root_port(struct cdns_pcie_rc *rc) { @@ -321,9 +383,10 @@ static int cdns_pcie_host_map_dma_ranges(struct cdns_pcie_rc *rc) resource_list_for_each_entry(entry, &bridge->dma_ranges) { err = cdns_pcie_host_bar_config(rc, entry); - if (err) + if (err) { dev_err(dev, "Fail to configure IB using dma-ranges\n"); - return err; + return err; + } } return 0; @@ -398,23 +461,6 @@ static int cdns_pcie_host_init(struct device *dev, return cdns_pcie_host_init_address_translation(rc); } -static int cdns_pcie_host_wait_for_link(struct cdns_pcie *pcie) -{ - struct device *dev = pcie->dev; - int retries; - - /* Check if the link is up or not */ - for (retries = 0; retries < LINK_WAIT_MAX_RETRIES; retries++) { - if (cdns_pcie_link_up(pcie)) { - dev_info(dev, "Link up\n"); - return 0; - } - usleep_range(LINK_WAIT_USLEEP_MIN, LINK_WAIT_USLEEP_MAX); - } - - return -ETIMEDOUT; -} - int cdns_pcie_host_setup(struct cdns_pcie_rc *rc) { struct device *dev = rc->pcie.dev; @@ -457,7 +503,7 @@ int cdns_pcie_host_setup(struct cdns_pcie_rc *rc) return ret; } - ret = cdns_pcie_host_wait_for_link(pcie); + ret = cdns_pcie_host_start_link(rc); if (ret) dev_dbg(dev, "PCIe link never came up\n"); diff --git a/drivers/pci/controller/cadence/pcie-cadence.h b/drivers/pci/controller/cadence/pcie-cadence.h index 30eba6cafe2c..254d2570f8c9 100644 --- a/drivers/pci/controller/cadence/pcie-cadence.h +++ b/drivers/pci/controller/cadence/pcie-cadence.h @@ -119,7 +119,7 @@ * Root Port Registers (PCI configuration space for the root port function) */ #define CDNS_PCIE_RP_BASE 0x00200000 - +#define CDNS_PCIE_RP_CAP_OFFSET 0xc0 /* * Address Translation Registers @@ -291,6 +291,7 @@ struct cdns_pcie { * @device_id: PCI device ID * @avail_ib_bar: Satus of RP_BAR0, RP_BAR1 and RP_NO_BAR if it's free or * available + * @quirk_retrain_flag: Retrain link as quirk for PCIe Gen2 */ struct cdns_pcie_rc { struct cdns_pcie pcie; @@ -299,6 +300,7 @@ struct cdns_pcie_rc { u32 vendor_id; u32 device_id; bool avail_ib_bar[CDNS_PCIE_RP_MAX_IB]; + bool quirk_retrain_flag; }; /** @@ -414,6 +416,13 @@ static inline void cdns_pcie_rp_writew(struct cdns_pcie *pcie, cdns_pcie_write_sz(addr, 0x2, value); } +static inline u16 cdns_pcie_rp_readw(struct cdns_pcie *pcie, u32 reg) +{ + void __iomem *addr = pcie->reg_base + CDNS_PCIE_RP_BASE + reg; + + return cdns_pcie_read_sz(addr, 0x2); +} + /* Endpoint Function register access */ static inline void cdns_pcie_ep_fn_writeb(struct cdns_pcie *pcie, u8 fn, u32 reg, u8 value) diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index affa2713bf80..0d605a0d69e3 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -398,7 +398,9 @@ static int qcom_pcie_init_2_1_0(struct qcom_pcie *pcie) /* enable external reference clock */ val = readl(pcie->parf + PCIE20_PARF_PHY_REFCLK); - val &= ~PHY_REFCLK_USE_PAD; + /* USE_PAD is required only for ipq806x */ + if (!of_device_is_compatible(node, "qcom,pcie-apq8064")) + val &= ~PHY_REFCLK_USE_PAD; val |= PHY_REFCLK_SSP_EN; writel(val, pcie->parf + PCIE20_PARF_PHY_REFCLK); diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c index 2470782cb01a..1c34c897a7e2 100644 --- a/drivers/pci/controller/pci-xgene-msi.c +++ b/drivers/pci/controller/pci-xgene-msi.c @@ -384,13 +384,9 @@ static int xgene_msi_hwirq_alloc(unsigned int cpu) if (!msi_group->gic_irq) continue; - irq_set_chained_handler(msi_group->gic_irq, - xgene_msi_isr); - err = irq_set_handler_data(msi_group->gic_irq, msi_group); - if (err) { - pr_err("failed to register GIC IRQ handler\n"); - return -EINVAL; - } + irq_set_chained_handler_and_data(msi_group->gic_irq, + xgene_msi_isr, msi_group); + /* * Statically allocate MSI GIC IRQs to each CPU core. * With 8-core X-Gene v1, 2 MSI GIC IRQs are allocated diff --git a/drivers/pci/controller/pcie-mediatek.c b/drivers/pci/controller/pcie-mediatek.c index cf4c18f0c25a..23548b517e4b 100644 --- a/drivers/pci/controller/pcie-mediatek.c +++ b/drivers/pci/controller/pcie-mediatek.c @@ -1035,14 +1035,14 @@ static int mtk_pcie_setup(struct mtk_pcie *pcie) err = of_pci_get_devfn(child); if (err < 0) { dev_err(dev, "failed to parse devfn: %d\n", err); - return err; + goto error_put_node; } slot = PCI_SLOT(err); err = mtk_pcie_parse_port(pcie, child, slot); if (err) - return err; + goto error_put_node; } err = mtk_pcie_subsys_powerup(pcie); @@ -1058,6 +1058,9 @@ static int mtk_pcie_setup(struct mtk_pcie *pcie) mtk_pcie_subsys_powerdown(pcie); return 0; +error_put_node: + of_node_put(child); + return err; } static int mtk_pcie_probe(struct platform_device *pdev) diff --git a/drivers/pci/controller/pcie-rcar-host.c b/drivers/pci/controller/pcie-rcar-host.c index 4d1c4b24e537..a728e8f9ad3c 100644 --- a/drivers/pci/controller/pcie-rcar-host.c +++ b/drivers/pci/controller/pcie-rcar-host.c @@ -735,7 +735,7 @@ static int rcar_pcie_enable_msi(struct rcar_pcie_host *host) } /* setup MSI data target */ - msi->pages = __get_free_pages(GFP_KERNEL, 0); + msi->pages = __get_free_pages(GFP_KERNEL | GFP_DMA32, 0); rcar_pcie_hw_enable_msi(host); return 0; diff --git a/drivers/pci/controller/pcie-rockchip.c b/drivers/pci/controller/pcie-rockchip.c index 904dec0d3a88..990a00e08bc5 100644 --- a/drivers/pci/controller/pcie-rockchip.c +++ b/drivers/pci/controller/pcie-rockchip.c @@ -82,7 +82,7 @@ int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip) } rockchip->mgmt_sticky_rst = devm_reset_control_get_exclusive(dev, - "mgmt-sticky"); + "mgmt-sticky"); if (IS_ERR(rockchip->mgmt_sticky_rst)) { if (PTR_ERR(rockchip->mgmt_sticky_rst) != -EPROBE_DEFER) dev_err(dev, "missing mgmt-sticky reset property in node\n"); @@ -118,11 +118,11 @@ int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip) } if (rockchip->is_rc) { - rockchip->ep_gpio = devm_gpiod_get(dev, "ep", GPIOD_OUT_HIGH); - if (IS_ERR(rockchip->ep_gpio)) { - dev_err(dev, "missing ep-gpios property in node\n"); - return PTR_ERR(rockchip->ep_gpio); - } + rockchip->ep_gpio = devm_gpiod_get_optional(dev, "ep", + GPIOD_OUT_HIGH); + if (IS_ERR(rockchip->ep_gpio)) + return dev_err_probe(dev, PTR_ERR(rockchip->ep_gpio), + "failed to get ep GPIO\n"); } rockchip->aclk_pcie = devm_clk_get(dev, "aclk"); diff --git a/drivers/pci/controller/pcie-xilinx-cpm.c b/drivers/pci/controller/pcie-xilinx-cpm.c index f92e0152e65e..67937facd90c 100644 --- a/drivers/pci/controller/pcie-xilinx-cpm.c +++ b/drivers/pci/controller/pcie-xilinx-cpm.c @@ -404,6 +404,7 @@ static int xilinx_cpm_pcie_init_irq_domain(struct xilinx_cpm_pcie_port *port) return 0; out: xilinx_cpm_free_irq_domains(port); + of_node_put(pcie_intc_node); dev_err(dev, "Failed to allocate IRQ domains\n"); return -ENOMEM; diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c index cdbfa5df3a51..dbfa0b55d31a 100644 --- a/drivers/pci/hotplug/rpadlpar_sysfs.c +++ b/drivers/pci/hotplug/rpadlpar_sysfs.c @@ -34,12 +34,11 @@ static ssize_t add_slot_store(struct kobject *kobj, struct kobj_attribute *attr, if (nbytes >= MAX_DRC_NAME_LEN) return 0; - memcpy(drc_name, buf, nbytes); + strscpy(drc_name, buf, nbytes + 1); end = strchr(drc_name, '\n'); - if (!end) - end = &drc_name[nbytes]; - *end = '\0'; + if (end) + *end = '\0'; rc = dlpar_add_slot(drc_name); if (rc) @@ -65,12 +64,11 @@ static ssize_t remove_slot_store(struct kobject *kobj, if (nbytes >= MAX_DRC_NAME_LEN) return 0; - memcpy(drc_name, buf, nbytes); + strscpy(drc_name, buf, nbytes + 1); end = strchr(drc_name, '\n'); - if (!end) - end = &drc_name[nbytes]; - *end = '\0'; + if (end) + *end = '\0'; rc = dlpar_remove_slot(drc_name); if (rc) diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index c9e790c74051..a047c421debe 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -93,8 +93,9 @@ static int disable_slot(struct hotplug_slot *hotplug_slot) pci_dev_put(pdev); return -EBUSY; } + pci_dev_put(pdev); - zpci_remove_device(zdev); + zpci_remove_device(zdev, false); rc = zpci_disable_device(zdev); if (rc) diff --git a/drivers/pci/pci-bridge-emul.c b/drivers/pci/pci-bridge-emul.c index 139869d50eb2..fdaf86a888b7 100644 --- a/drivers/pci/pci-bridge-emul.c +++ b/drivers/pci/pci-bridge-emul.c @@ -21,8 +21,9 @@ #include "pci-bridge-emul.h" #define PCI_BRIDGE_CONF_END PCI_STD_HEADER_SIZEOF +#define PCI_CAP_PCIE_SIZEOF (PCI_EXP_SLTSTA2 + 2) #define PCI_CAP_PCIE_START PCI_BRIDGE_CONF_END -#define PCI_CAP_PCIE_END (PCI_CAP_PCIE_START + PCI_EXP_SLTSTA2 + 2) +#define PCI_CAP_PCIE_END (PCI_CAP_PCIE_START + PCI_CAP_PCIE_SIZEOF) /** * struct pci_bridge_reg_behavior - register bits behaviors @@ -46,7 +47,8 @@ struct pci_bridge_reg_behavior { u32 w1c; }; -static const struct pci_bridge_reg_behavior pci_regs_behavior[] = { +static const +struct pci_bridge_reg_behavior pci_regs_behavior[PCI_STD_HEADER_SIZEOF / 4] = { [PCI_VENDOR_ID / 4] = { .ro = ~0 }, [PCI_COMMAND / 4] = { .rw = (PCI_COMMAND_IO | PCI_COMMAND_MEMORY | @@ -164,7 +166,8 @@ static const struct pci_bridge_reg_behavior pci_regs_behavior[] = { }, }; -static const struct pci_bridge_reg_behavior pcie_cap_regs_behavior[] = { +static const +struct pci_bridge_reg_behavior pcie_cap_regs_behavior[PCI_CAP_PCIE_SIZEOF / 4] = { [PCI_CAP_LIST_ID / 4] = { /* * Capability ID, Next Capability Pointer and @@ -260,6 +263,8 @@ static const struct pci_bridge_reg_behavior pcie_cap_regs_behavior[] = { int pci_bridge_emul_init(struct pci_bridge_emul *bridge, unsigned int flags) { + BUILD_BUG_ON(sizeof(bridge->conf) != PCI_BRIDGE_CONF_END); + bridge->conf.class_revision |= cpu_to_le32(PCI_CLASS_BRIDGE_PCI << 16); bridge->conf.header_type = PCI_HEADER_TYPE_BRIDGE; bridge->conf.cache_line_size = 0x10; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 790393d1e318..9449dfde2841 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -3596,7 +3596,14 @@ u32 pci_rebar_get_possible_sizes(struct pci_dev *pdev, int bar) return 0; pci_read_config_dword(pdev, pos + PCI_REBAR_CAP, &cap); - return (cap & PCI_REBAR_CAP_SIZES) >> 4; + cap &= PCI_REBAR_CAP_SIZES; + + /* Sapphire RX 5600 XT Pulse has an invalid cap dword for BAR 0 */ + if (pdev->vendor == PCI_VENDOR_ID_ATI && pdev->device == 0x731f && + bar == 0 && cap == 0x7000) + cap = 0x3f000; + + return cap >> 4; } /** @@ -4022,6 +4029,10 @@ int pci_register_io_range(struct fwnode_handle *fwnode, phys_addr_t addr, ret = logic_pio_register_range(range); if (ret) kfree(range); + + /* Ignore duplicates due to deferred probing */ + if (ret == -EEXIST) + ret = 0; #endif return ret; diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index 3946555a6042..45a2ef702b45 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -133,14 +133,6 @@ config PCIE_PTM This is only useful if you have devices that support PTM, but it is safe to enable even if you don't. -config PCIE_BW - bool "PCI Express Bandwidth Change Notification" - depends on PCIEPORTBUS - help - This enables PCI Express Bandwidth Change Notification. If - you know link width or rate changes occur only to correct - unreliable links, you may answer Y. - config PCIE_EDR bool "PCI Express Error Disconnect Recover support" depends on PCIE_DPC && ACPI diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index d9697892fa3e..b2980db88cc0 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -12,5 +12,4 @@ obj-$(CONFIG_PCIEAER_INJECT) += aer_inject.o obj-$(CONFIG_PCIE_PME) += pme.o obj-$(CONFIG_PCIE_DPC) += dpc.o obj-$(CONFIG_PCIE_PTM) += ptm.o -obj-$(CONFIG_PCIE_BW) += bw_notification.o obj-$(CONFIG_PCIE_EDR) += edr.o diff --git a/drivers/pci/pcie/bw_notification.c b/drivers/pci/pcie/bw_notification.c deleted file mode 100644 index 565d23cccb8b..000000000000 --- a/drivers/pci/pcie/bw_notification.c +++ /dev/null @@ -1,138 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * PCI Express Link Bandwidth Notification services driver - * Author: Alexandru Gagniuc - * - * Copyright (C) 2019, Dell Inc - * - * The PCIe Link Bandwidth Notification provides a way to notify the - * operating system when the link width or data rate changes. This - * capability is required for all root ports and downstream ports - * supporting links wider than x1 and/or multiple link speeds. - * - * This service port driver hooks into the bandwidth notification interrupt - * and warns when links become degraded in operation. - */ - -#define dev_fmt(fmt) "bw_notification: " fmt - -#include "../pci.h" -#include "portdrv.h" - -static bool pcie_link_bandwidth_notification_supported(struct pci_dev *dev) -{ - int ret; - u32 lnk_cap; - - ret = pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnk_cap); - return (ret == PCIBIOS_SUCCESSFUL) && (lnk_cap & PCI_EXP_LNKCAP_LBNC); -} - -static void pcie_enable_link_bandwidth_notification(struct pci_dev *dev) -{ - u16 lnk_ctl; - - pcie_capability_write_word(dev, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS); - - pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &lnk_ctl); - lnk_ctl |= PCI_EXP_LNKCTL_LBMIE; - pcie_capability_write_word(dev, PCI_EXP_LNKCTL, lnk_ctl); -} - -static void pcie_disable_link_bandwidth_notification(struct pci_dev *dev) -{ - u16 lnk_ctl; - - pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &lnk_ctl); - lnk_ctl &= ~PCI_EXP_LNKCTL_LBMIE; - pcie_capability_write_word(dev, PCI_EXP_LNKCTL, lnk_ctl); -} - -static irqreturn_t pcie_bw_notification_irq(int irq, void *context) -{ - struct pcie_device *srv = context; - struct pci_dev *port = srv->port; - u16 link_status, events; - int ret; - - ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status); - events = link_status & PCI_EXP_LNKSTA_LBMS; - - if (ret != PCIBIOS_SUCCESSFUL || !events) - return IRQ_NONE; - - pcie_capability_write_word(port, PCI_EXP_LNKSTA, events); - pcie_update_link_speed(port->subordinate, link_status); - return IRQ_WAKE_THREAD; -} - -static irqreturn_t pcie_bw_notification_handler(int irq, void *context) -{ - struct pcie_device *srv = context; - struct pci_dev *port = srv->port; - struct pci_dev *dev; - - /* - * Print status from downstream devices, not this root port or - * downstream switch port. - */ - down_read(&pci_bus_sem); - list_for_each_entry(dev, &port->subordinate->devices, bus_list) - pcie_report_downtraining(dev); - up_read(&pci_bus_sem); - - return IRQ_HANDLED; -} - -static int pcie_bandwidth_notification_probe(struct pcie_device *srv) -{ - int ret; - - /* Single-width or single-speed ports do not have to support this. */ - if (!pcie_link_bandwidth_notification_supported(srv->port)) - return -ENODEV; - - ret = request_threaded_irq(srv->irq, pcie_bw_notification_irq, - pcie_bw_notification_handler, - IRQF_SHARED, "PCIe BW notif", srv); - if (ret) - return ret; - - pcie_enable_link_bandwidth_notification(srv->port); - pci_info(srv->port, "enabled with IRQ %d\n", srv->irq); - - return 0; -} - -static void pcie_bandwidth_notification_remove(struct pcie_device *srv) -{ - pcie_disable_link_bandwidth_notification(srv->port); - free_irq(srv->irq, srv); -} - -static int pcie_bandwidth_notification_suspend(struct pcie_device *srv) -{ - pcie_disable_link_bandwidth_notification(srv->port); - return 0; -} - -static int pcie_bandwidth_notification_resume(struct pcie_device *srv) -{ - pcie_enable_link_bandwidth_notification(srv->port); - return 0; -} - -static struct pcie_port_service_driver pcie_bandwidth_notification_driver = { - .name = "pcie_bw_notification", - .port_type = PCIE_ANY_PORT, - .service = PCIE_PORT_SERVICE_BWNOTIF, - .probe = pcie_bandwidth_notification_probe, - .suspend = pcie_bandwidth_notification_suspend, - .resume = pcie_bandwidth_notification_resume, - .remove = pcie_bandwidth_notification_remove, -}; - -int __init pcie_bandwidth_notification_init(void) -{ - return pcie_port_service_register(&pcie_bandwidth_notification_driver); -} diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 510f31f0ef6d..4798bd6de97d 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -198,8 +198,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pci_dbg(bridge, "broadcast error_detected message\n"); if (state == pci_channel_io_frozen) { pci_walk_bridge(bridge, report_frozen_detected, &status); - status = reset_subordinates(bridge); - if (status != PCI_ERS_RESULT_RECOVERED) { + if (reset_subordinates(bridge) != PCI_ERS_RESULT_RECOVERED) { pci_warn(bridge, "subordinate device reset failed\n"); goto failed; } diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index af7cf237432a..2ff5724b8f13 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -53,12 +53,6 @@ int pcie_dpc_init(void); static inline int pcie_dpc_init(void) { return 0; } #endif -#ifdef CONFIG_PCIE_BW -int pcie_bandwidth_notification_init(void); -#else -static inline int pcie_bandwidth_notification_init(void) { return 0; } -#endif - /* Port Type */ #define PCIE_ANY_PORT (~0) diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 0b250bc5f405..8bd4992a4f32 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -255,7 +255,6 @@ static void __init pcie_init_services(void) pcie_pme_init(); pcie_dpc_init(); pcie_hp_init(); - pcie_bandwidth_notification_init(); } static int __init pcie_portdrv_init(void) diff --git a/drivers/pci/setup-res.c b/drivers/pci/setup-res.c index 43eda101fcf4..7f1acb3918d0 100644 --- a/drivers/pci/setup-res.c +++ b/drivers/pci/setup-res.c @@ -410,10 +410,16 @@ EXPORT_SYMBOL(pci_release_resource); int pci_resize_resource(struct pci_dev *dev, int resno, int size) { struct resource *res = dev->resource + resno; + struct pci_host_bridge *host; int old, ret; u32 sizes; u16 cmd; + /* Check if we must preserve the firmware's resource assignment */ + host = pci_find_host_bridge(dev->bus); + if (host->preserve_config) + return -ENOTSUPP; + /* Make sure the resource isn't assigned before resizing it. */ if (!(res->flags & IORESOURCE_UNSET)) return -EBUSY; diff --git a/drivers/pci/syscall.c b/drivers/pci/syscall.c index 31e39558d49d..8b003c890b87 100644 --- a/drivers/pci/syscall.c +++ b/drivers/pci/syscall.c @@ -20,7 +20,7 @@ SYSCALL_DEFINE5(pciconfig_read, unsigned long, bus, unsigned long, dfn, u16 word; u32 dword; long err; - long cfg_ret; + int cfg_ret; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -46,7 +46,7 @@ SYSCALL_DEFINE5(pciconfig_read, unsigned long, bus, unsigned long, dfn, } err = -EIO; - if (cfg_ret != PCIBIOS_SUCCESSFUL) + if (cfg_ret) goto error; switch (len) { @@ -105,7 +105,7 @@ SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn, if (err) break; err = pci_user_write_config_byte(dev, off, byte); - if (err != PCIBIOS_SUCCESSFUL) + if (err) err = -EIO; break; @@ -114,7 +114,7 @@ SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn, if (err) break; err = pci_user_write_config_word(dev, off, word); - if (err != PCIBIOS_SUCCESSFUL) + if (err) err = -EIO; break; @@ -123,7 +123,7 @@ SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn, if (err) break; err = pci_user_write_config_dword(dev, off, dword); - if (err != PCIBIOS_SUCCESSFUL) + if (err) err = -EIO; break; diff --git a/drivers/perf/arm-cmn.c b/drivers/perf/arm-cmn.c index a76ff594f3ca..46defb1dcf86 100644 --- a/drivers/perf/arm-cmn.c +++ b/drivers/perf/arm-cmn.c @@ -1150,7 +1150,7 @@ static int arm_cmn_commit_txn(struct pmu *pmu) static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) { struct arm_cmn *cmn; - unsigned int target; + unsigned int i, target; cmn = hlist_entry_safe(node, struct arm_cmn, cpuhp_node); if (cpu != cmn->cpu) @@ -1161,6 +1161,8 @@ static int arm_cmn_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node) return 0; perf_pmu_migrate_context(&cmn->pmu, cpu, target); + for (i = 0; i < cmn->num_dtcs; i++) + irq_set_affinity_hint(cmn->dtc[i].irq, cpumask_of(target)); cmn->cpu = target; return 0; } @@ -1502,7 +1504,7 @@ static int arm_cmn_probe(struct platform_device *pdev) struct arm_cmn *cmn; const char *name; static atomic_t id; - int err, rootnode, this_id; + int err, rootnode; cmn = devm_kzalloc(&pdev->dev, sizeof(*cmn), GFP_KERNEL); if (!cmn) @@ -1549,14 +1551,9 @@ static int arm_cmn_probe(struct platform_device *pdev) .cancel_txn = arm_cmn_end_txn, }; - this_id = atomic_fetch_inc(&id); - if (this_id == 0) { - name = "arm_cmn"; - } else { - name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", this_id); - if (!name) - return -ENOMEM; - } + name = devm_kasprintf(cmn->dev, GFP_KERNEL, "arm_cmn_%d", atomic_fetch_inc(&id)); + if (!name) + return -ENOMEM; err = cpuhp_state_add_instance(arm_cmn_hp_state, &cmn->cpuhp_node); if (err) diff --git a/drivers/perf/arm_dmc620_pmu.c b/drivers/perf/arm_dmc620_pmu.c index 004930eb4bbb..b50b47f1a0d9 100644 --- a/drivers/perf/arm_dmc620_pmu.c +++ b/drivers/perf/arm_dmc620_pmu.c @@ -681,6 +681,7 @@ static int dmc620_pmu_device_probe(struct platform_device *pdev) if (!name) { dev_err(&pdev->dev, "Create name failed, PMU @%pa\n", &res->start); + ret = -ENOMEM; goto out_teardown_dev; } diff --git a/drivers/phy/Kconfig b/drivers/phy/Kconfig index 00dabe5fab8a..68d9c2f6a5ca 100644 --- a/drivers/phy/Kconfig +++ b/drivers/phy/Kconfig @@ -52,6 +52,7 @@ config PHY_XGENE config USB_LGM_PHY tristate "INTEL Lightning Mountain USB PHY Driver" depends on USB_SUPPORT + depends on X86 || COMPILE_TEST select USB_PHY select REGULATOR select REGULATOR_FIXED_VOLTAGE diff --git a/drivers/phy/cadence/phy-cadence-torrent.c b/drivers/phy/cadence/phy-cadence-torrent.c index f310e15d94cb..591a15834b48 100644 --- a/drivers/phy/cadence/phy-cadence-torrent.c +++ b/drivers/phy/cadence/phy-cadence-torrent.c @@ -2298,6 +2298,7 @@ static int cdns_torrent_phy_probe(struct platform_device *pdev) if (total_num_lanes > MAX_NUM_LANES) { dev_err(dev, "Invalid lane configuration\n"); + ret = -EINVAL; goto put_lnk_rst; } diff --git a/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c b/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c index a7d126192cf1..29d246ea24b4 100644 --- a/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c +++ b/drivers/phy/lantiq/phy-lantiq-rcu-usb2.c @@ -124,8 +124,16 @@ static int ltq_rcu_usb2_phy_power_on(struct phy *phy) reset_control_deassert(priv->phy_reset); ret = clk_prepare_enable(priv->phy_gate_clk); - if (ret) + if (ret) { dev_err(dev, "failed to enable PHY gate\n"); + return ret; + } + + /* + * at least the xrx200 usb2 phy requires some extra time to be + * operational after enabling the clock + */ + usleep_range(100, 200); return ret; } diff --git a/drivers/phy/mediatek/phy-mtk-hdmi.c b/drivers/phy/mediatek/phy-mtk-hdmi.c index 45be8aa724f3..8313bd517e4c 100644 --- a/drivers/phy/mediatek/phy-mtk-hdmi.c +++ b/drivers/phy/mediatek/phy-mtk-hdmi.c @@ -201,6 +201,7 @@ static const struct of_device_id mtk_hdmi_phy_match[] = { }, {}, }; +MODULE_DEVICE_TABLE(of, mtk_hdmi_phy_match); static struct platform_driver mtk_hdmi_phy_driver = { .probe = mtk_hdmi_phy_probe, diff --git a/drivers/phy/mediatek/phy-mtk-mipi-dsi.c b/drivers/phy/mediatek/phy-mtk-mipi-dsi.c index 18c481251f04..9c7815bb9000 100644 --- a/drivers/phy/mediatek/phy-mtk-mipi-dsi.c +++ b/drivers/phy/mediatek/phy-mtk-mipi-dsi.c @@ -233,6 +233,7 @@ static const struct of_device_id mtk_mipi_tx_match[] = { .data = &mt8183_mipitx_data }, { }, }; +MODULE_DEVICE_TABLE(of, mtk_mipi_tx_match); struct platform_driver mtk_mipi_tx_driver = { .probe = mtk_mipi_tx_probe, diff --git a/drivers/phy/rockchip/phy-rockchip-emmc.c b/drivers/phy/rockchip/phy-rockchip-emmc.c index 1e424f263e7a..496d199852af 100644 --- a/drivers/phy/rockchip/phy-rockchip-emmc.c +++ b/drivers/phy/rockchip/phy-rockchip-emmc.c @@ -248,15 +248,17 @@ static int rockchip_emmc_phy_init(struct phy *phy) * - SDHCI driver to get the PHY * - SDHCI driver to init the PHY * - * The clock is optional, so upon any error we just set to NULL. + * The clock is optional, using clk_get_optional() to get the clock + * and do error processing if the return value != NULL * * NOTE: we don't do anything special for EPROBE_DEFER here. Given the * above expected use case, EPROBE_DEFER isn't sensible to expect, so * it's just like any other error. */ - rk_phy->emmcclk = clk_get(&phy->dev, "emmcclk"); + rk_phy->emmcclk = clk_get_optional(&phy->dev, "emmcclk"); if (IS_ERR(rk_phy->emmcclk)) { - dev_dbg(&phy->dev, "Error getting emmcclk: %d\n", ret); + ret = PTR_ERR(rk_phy->emmcclk); + dev_err(&phy->dev, "Error getting emmcclk: %d\n", ret); rk_phy->emmcclk = NULL; } diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index 7c92a6e22d75..aa7f7aa77297 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -526,11 +526,13 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev) * power), not wake up. */ ec_dev->host_event_wake_mask = U32_MAX & - ~(BIT(EC_HOST_EVENT_AC_DISCONNECTED) | - BIT(EC_HOST_EVENT_BATTERY_LOW) | - BIT(EC_HOST_EVENT_BATTERY_CRITICAL) | - BIT(EC_HOST_EVENT_PD_MCU) | - BIT(EC_HOST_EVENT_BATTERY_STATUS)); + ~(EC_HOST_EVENT_MASK(EC_HOST_EVENT_LID_CLOSED) | + EC_HOST_EVENT_MASK(EC_HOST_EVENT_AC_DISCONNECTED) | + EC_HOST_EVENT_MASK(EC_HOST_EVENT_BATTERY_LOW) | + EC_HOST_EVENT_MASK(EC_HOST_EVENT_BATTERY_CRITICAL) | + EC_HOST_EVENT_MASK(EC_HOST_EVENT_BATTERY) | + EC_HOST_EVENT_MASK(EC_HOST_EVENT_PD_MCU) | + EC_HOST_EVENT_MASK(EC_HOST_EVENT_BATTERY_STATUS)); /* * Old ECs may not support this command. Complain about all * other errors. diff --git a/drivers/platform/olpc/olpc-ec.c b/drivers/platform/olpc/olpc-ec.c index f64b82824db2..2db7113383fd 100644 --- a/drivers/platform/olpc/olpc-ec.c +++ b/drivers/platform/olpc/olpc-ec.c @@ -426,11 +426,8 @@ static int olpc_ec_probe(struct platform_device *pdev) /* get the EC revision */ err = olpc_ec_cmd(EC_FIRMWARE_REV, NULL, 0, &ec->version, 1); - if (err) { - ec_priv = NULL; - kfree(ec); - return err; - } + if (err) + goto error; config.dev = pdev->dev.parent; config.driver_data = ec; @@ -440,12 +437,16 @@ static int olpc_ec_probe(struct platform_device *pdev) if (IS_ERR(ec->dcon_rdev)) { dev_err(&pdev->dev, "failed to register DCON regulator\n"); err = PTR_ERR(ec->dcon_rdev); - kfree(ec); - return err; + goto error; } ec->dbgfs_dir = olpc_ec_setup_debugfs(); + return 0; + +error: + ec_priv = NULL; + kfree(ec); return err; } diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 91e6176cdfbd..ac4125ec0660 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1369,7 +1369,7 @@ config INTEL_PMC_CORE - MPHY/PLL gating status (Sunrisepoint PCH only) config INTEL_PMT_CLASS - tristate "Intel Platform Monitoring Technology (PMT) Class driver" + tristate help The Intel Platform Monitoring Technology (PMT) class driver provides the basic sysfs interface and file hierarchy uses by PMT devices. @@ -1382,6 +1382,7 @@ config INTEL_PMT_CLASS config INTEL_PMT_TELEMETRY tristate "Intel Platform Monitoring Technology (PMT) Telemetry driver" + depends on MFD_INTEL_PMT select INTEL_PMT_CLASS help The Intel Platform Monitory Technology (PMT) Telemetry driver provides @@ -1393,6 +1394,7 @@ config INTEL_PMT_TELEMETRY config INTEL_PMT_CRASHLOG tristate "Intel Platform Monitoring Technology (PMT) Crashlog driver" + depends on MFD_INTEL_PMT select INTEL_PMT_CLASS help The Intel Platform Monitoring Technology (PMT) crashlog driver provides diff --git a/drivers/platform/x86/amd-pmc.c b/drivers/platform/x86/amd-pmc.c index ef8342572463..b9da58ee9b1e 100644 --- a/drivers/platform/x86/amd-pmc.c +++ b/drivers/platform/x86/amd-pmc.c @@ -210,31 +210,39 @@ static int amd_pmc_probe(struct platform_device *pdev) dev->dev = &pdev->dev; rdev = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0)); - if (!rdev || !pci_match_id(pmc_pci_ids, rdev)) + if (!rdev || !pci_match_id(pmc_pci_ids, rdev)) { + pci_dev_put(rdev); return -ENODEV; + } dev->cpu_id = rdev->device; err = pci_write_config_dword(rdev, AMD_PMC_SMU_INDEX_ADDRESS, AMD_PMC_BASE_ADDR_LO); if (err) { dev_err(dev->dev, "error writing to 0x%x\n", AMD_PMC_SMU_INDEX_ADDRESS); + pci_dev_put(rdev); return pcibios_err_to_errno(err); } err = pci_read_config_dword(rdev, AMD_PMC_SMU_INDEX_DATA, &val); - if (err) + if (err) { + pci_dev_put(rdev); return pcibios_err_to_errno(err); + } base_addr_lo = val & AMD_PMC_BASE_ADDR_HI_MASK; err = pci_write_config_dword(rdev, AMD_PMC_SMU_INDEX_ADDRESS, AMD_PMC_BASE_ADDR_HI); if (err) { dev_err(dev->dev, "error writing to 0x%x\n", AMD_PMC_SMU_INDEX_ADDRESS); + pci_dev_put(rdev); return pcibios_err_to_errno(err); } err = pci_read_config_dword(rdev, AMD_PMC_SMU_INDEX_DATA, &val); - if (err) + if (err) { + pci_dev_put(rdev); return pcibios_err_to_errno(err); + } base_addr_hi = val & AMD_PMC_BASE_ADDR_LO_MASK; pci_dev_put(rdev); diff --git a/drivers/platform/x86/dell-wmi-sysman/enum-attributes.c b/drivers/platform/x86/dell-wmi-sysman/enum-attributes.c index 80f4b7785c6c..091e48c217ed 100644 --- a/drivers/platform/x86/dell-wmi-sysman/enum-attributes.c +++ b/drivers/platform/x86/dell-wmi-sysman/enum-attributes.c @@ -185,5 +185,8 @@ void exit_enum_attributes(void) sysfs_remove_group(wmi_priv.enumeration_data[instance_id].attr_name_kobj, &enumeration_attr_group); } + wmi_priv.enumeration_instances_count = 0; + kfree(wmi_priv.enumeration_data); + wmi_priv.enumeration_data = NULL; } diff --git a/drivers/platform/x86/dell-wmi-sysman/int-attributes.c b/drivers/platform/x86/dell-wmi-sysman/int-attributes.c index 75aedbb733be..8a49ba6e44f9 100644 --- a/drivers/platform/x86/dell-wmi-sysman/int-attributes.c +++ b/drivers/platform/x86/dell-wmi-sysman/int-attributes.c @@ -175,5 +175,8 @@ void exit_int_attributes(void) sysfs_remove_group(wmi_priv.integer_data[instance_id].attr_name_kobj, &integer_attr_group); } + wmi_priv.integer_instances_count = 0; + kfree(wmi_priv.integer_data); + wmi_priv.integer_data = NULL; } diff --git a/drivers/platform/x86/dell-wmi-sysman/passobj-attributes.c b/drivers/platform/x86/dell-wmi-sysman/passobj-attributes.c index 3abcd95477c0..834b3e82ad9f 100644 --- a/drivers/platform/x86/dell-wmi-sysman/passobj-attributes.c +++ b/drivers/platform/x86/dell-wmi-sysman/passobj-attributes.c @@ -183,5 +183,8 @@ void exit_po_attributes(void) sysfs_remove_group(wmi_priv.po_data[instance_id].attr_name_kobj, &po_attr_group); } + wmi_priv.po_instances_count = 0; + kfree(wmi_priv.po_data); + wmi_priv.po_data = NULL; } diff --git a/drivers/platform/x86/dell-wmi-sysman/string-attributes.c b/drivers/platform/x86/dell-wmi-sysman/string-attributes.c index ac75dce88a4c..552537852459 100644 --- a/drivers/platform/x86/dell-wmi-sysman/string-attributes.c +++ b/drivers/platform/x86/dell-wmi-sysman/string-attributes.c @@ -155,5 +155,8 @@ void exit_str_attributes(void) sysfs_remove_group(wmi_priv.str_data[instance_id].attr_name_kobj, &str_attr_group); } + wmi_priv.str_instances_count = 0; + kfree(wmi_priv.str_data); + wmi_priv.str_data = NULL; } diff --git a/drivers/platform/x86/dell-wmi-sysman/sysman.c b/drivers/platform/x86/dell-wmi-sysman/sysman.c index cb81010ba1a2..7410ccae650c 100644 --- a/drivers/platform/x86/dell-wmi-sysman/sysman.c +++ b/drivers/platform/x86/dell-wmi-sysman/sysman.c @@ -210,25 +210,17 @@ static struct kobj_attribute pending_reboot = __ATTR_RO(pending_reboot); */ static int create_attributes_level_sysfs_files(void) { - int ret = sysfs_create_file(&wmi_priv.main_dir_kset->kobj, &reset_bios.attr); + int ret; - if (ret) { - pr_debug("could not create reset_bios file\n"); + ret = sysfs_create_file(&wmi_priv.main_dir_kset->kobj, &reset_bios.attr); + if (ret) return ret; - } ret = sysfs_create_file(&wmi_priv.main_dir_kset->kobj, &pending_reboot.attr); - if (ret) { - pr_debug("could not create changing_pending_reboot file\n"); - sysfs_remove_file(&wmi_priv.main_dir_kset->kobj, &reset_bios.attr); - } - return ret; -} + if (ret) + return ret; -static void release_reset_bios_data(void) -{ - sysfs_remove_file(&wmi_priv.main_dir_kset->kobj, &reset_bios.attr); - sysfs_remove_file(&wmi_priv.main_dir_kset->kobj, &pending_reboot.attr); + return 0; } static ssize_t wmi_sysman_attr_show(struct kobject *kobj, struct attribute *attr, @@ -373,8 +365,6 @@ static void destroy_attribute_objs(struct kset *kset) */ static void release_attributes_data(void) { - release_reset_bios_data(); - mutex_lock(&wmi_priv.mutex); exit_enum_attributes(); exit_int_attributes(); @@ -386,11 +376,13 @@ static void release_attributes_data(void) wmi_priv.authentication_dir_kset = NULL; } if (wmi_priv.main_dir_kset) { + sysfs_remove_file(&wmi_priv.main_dir_kset->kobj, &reset_bios.attr); + sysfs_remove_file(&wmi_priv.main_dir_kset->kobj, &pending_reboot.attr); destroy_attribute_objs(wmi_priv.main_dir_kset); kset_unregister(wmi_priv.main_dir_kset); + wmi_priv.main_dir_kset = NULL; } mutex_unlock(&wmi_priv.mutex); - } /** @@ -497,7 +489,6 @@ static int init_bios_attributes(int attr_type, const char *guid) err_attr_init: mutex_unlock(&wmi_priv.mutex); - release_attributes_data(); kfree(obj); return retval; } @@ -513,102 +504,91 @@ static int __init sysman_init(void) } ret = init_bios_attr_set_interface(); - if (ret || !wmi_priv.bios_attr_wdev) { - pr_debug("failed to initialize set interface\n"); - goto fail_set_interface; - } + if (ret) + return ret; ret = init_bios_attr_pass_interface(); - if (ret || !wmi_priv.password_attr_wdev) { - pr_debug("failed to initialize pass interface\n"); - goto fail_pass_interface; + if (ret) + goto err_exit_bios_attr_set_interface; + + if (!wmi_priv.bios_attr_wdev || !wmi_priv.password_attr_wdev) { + pr_debug("failed to find set or pass interface\n"); + ret = -ENODEV; + goto err_exit_bios_attr_pass_interface; } ret = class_register(&firmware_attributes_class); if (ret) - goto fail_class; + goto err_exit_bios_attr_pass_interface; wmi_priv.class_dev = device_create(&firmware_attributes_class, NULL, MKDEV(0, 0), NULL, "%s", DRIVER_NAME); if (IS_ERR(wmi_priv.class_dev)) { ret = PTR_ERR(wmi_priv.class_dev); - goto fail_classdev; + goto err_unregister_class; } wmi_priv.main_dir_kset = kset_create_and_add("attributes", NULL, &wmi_priv.class_dev->kobj); if (!wmi_priv.main_dir_kset) { ret = -ENOMEM; - goto fail_main_kset; + goto err_destroy_classdev; } wmi_priv.authentication_dir_kset = kset_create_and_add("authentication", NULL, &wmi_priv.class_dev->kobj); if (!wmi_priv.authentication_dir_kset) { ret = -ENOMEM; - goto fail_authentication_kset; + goto err_release_attributes_data; } ret = create_attributes_level_sysfs_files(); if (ret) { pr_debug("could not create reset BIOS attribute\n"); - goto fail_reset_bios; + goto err_release_attributes_data; } ret = init_bios_attributes(ENUM, DELL_WMI_BIOS_ENUMERATION_ATTRIBUTE_GUID); if (ret) { pr_debug("failed to populate enumeration type attributes\n"); - goto fail_create_group; + goto err_release_attributes_data; } ret = init_bios_attributes(INT, DELL_WMI_BIOS_INTEGER_ATTRIBUTE_GUID); if (ret) { pr_debug("failed to populate integer type attributes\n"); - goto fail_create_group; + goto err_release_attributes_data; } ret = init_bios_attributes(STR, DELL_WMI_BIOS_STRING_ATTRIBUTE_GUID); if (ret) { pr_debug("failed to populate string type attributes\n"); - goto fail_create_group; + goto err_release_attributes_data; } ret = init_bios_attributes(PO, DELL_WMI_BIOS_PASSOBJ_ATTRIBUTE_GUID); if (ret) { pr_debug("failed to populate pass object type attributes\n"); - goto fail_create_group; + goto err_release_attributes_data; } return 0; -fail_create_group: +err_release_attributes_data: release_attributes_data(); -fail_reset_bios: - if (wmi_priv.authentication_dir_kset) { - kset_unregister(wmi_priv.authentication_dir_kset); - wmi_priv.authentication_dir_kset = NULL; - } - -fail_authentication_kset: - if (wmi_priv.main_dir_kset) { - kset_unregister(wmi_priv.main_dir_kset); - wmi_priv.main_dir_kset = NULL; - } - -fail_main_kset: +err_destroy_classdev: device_destroy(&firmware_attributes_class, MKDEV(0, 0)); -fail_classdev: +err_unregister_class: class_unregister(&firmware_attributes_class); -fail_class: +err_exit_bios_attr_pass_interface: exit_bios_attr_pass_interface(); -fail_pass_interface: +err_exit_bios_attr_set_interface: exit_bios_attr_set_interface(); -fail_set_interface: return ret; } diff --git a/drivers/platform/x86/intel-vbtn.c b/drivers/platform/x86/intel-vbtn.c index 30a9062d2b4b..a90c32d072da 100644 --- a/drivers/platform/x86/intel-vbtn.c +++ b/drivers/platform/x86/intel-vbtn.c @@ -47,8 +47,16 @@ static const struct key_entry intel_vbtn_keymap[] = { }; static const struct key_entry intel_vbtn_switchmap[] = { - { KE_SW, 0xCA, { .sw = { SW_DOCK, 1 } } }, /* Docked */ - { KE_SW, 0xCB, { .sw = { SW_DOCK, 0 } } }, /* Undocked */ + /* + * SW_DOCK should only be reported for docking stations, but DSDTs using the + * intel-vbtn code, always seem to use this for 2-in-1s / convertibles and set + * SW_DOCK=1 when in laptop-mode (in tandem with setting SW_TABLET_MODE=0). + * This causes userspace to think the laptop is docked to a port-replicator + * and to disable suspend-on-lid-close, which is undesirable. + * Map the dock events to KEY_IGNORE to avoid this broken SW_DOCK reporting. + */ + { KE_IGNORE, 0xCA, { .sw = { SW_DOCK, 1 } } }, /* Docked */ + { KE_IGNORE, 0xCB, { .sw = { SW_DOCK, 0 } } }, /* Undocked */ { KE_SW, 0xCC, { .sw = { SW_TABLET_MODE, 1 } } }, /* Tablet */ { KE_SW, 0xCD, { .sw = { SW_TABLET_MODE, 0 } } }, /* Laptop */ }; diff --git a/drivers/platform/x86/intel_pmt_crashlog.c b/drivers/platform/x86/intel_pmt_crashlog.c index 97dd749c8290..92d315a16cfd 100644 --- a/drivers/platform/x86/intel_pmt_crashlog.c +++ b/drivers/platform/x86/intel_pmt_crashlog.c @@ -23,18 +23,17 @@ #define CRASH_TYPE_OOBMSM 1 /* Control Flags */ -#define CRASHLOG_FLAG_DISABLE BIT(27) +#define CRASHLOG_FLAG_DISABLE BIT(28) /* - * Bits 28 and 29 control the state of bit 31. + * Bits 29 and 30 control the state of bit 31. * - * Bit 28 will clear bit 31, if set, allowing a new crashlog to be captured. - * Bit 29 will immediately trigger a crashlog to be generated, setting bit 31. - * Bit 30 is read-only and reserved as 0. + * Bit 29 will clear bit 31, if set, allowing a new crashlog to be captured. + * Bit 30 will immediately trigger a crashlog to be generated, setting bit 31. * Bit 31 is the read-only status with a 1 indicating log is complete. */ -#define CRASHLOG_FLAG_TRIGGER_CLEAR BIT(28) -#define CRASHLOG_FLAG_TRIGGER_EXECUTE BIT(29) +#define CRASHLOG_FLAG_TRIGGER_CLEAR BIT(29) +#define CRASHLOG_FLAG_TRIGGER_EXECUTE BIT(30) #define CRASHLOG_FLAG_TRIGGER_COMPLETE BIT(31) #define CRASHLOG_FLAG_TRIGGER_MASK GENMASK(31, 28) diff --git a/drivers/power/reset/at91-sama5d2_shdwc.c b/drivers/power/reset/at91-sama5d2_shdwc.c index 2fe3a627cb53..d9cf91e5b06d 100644 --- a/drivers/power/reset/at91-sama5d2_shdwc.c +++ b/drivers/power/reset/at91-sama5d2_shdwc.c @@ -37,7 +37,7 @@ #define AT91_SHDW_MR 0x04 /* Shut Down Mode Register */ #define AT91_SHDW_WKUPDBC_SHIFT 24 -#define AT91_SHDW_WKUPDBC_MASK GENMASK(31, 16) +#define AT91_SHDW_WKUPDBC_MASK GENMASK(26, 24) #define AT91_SHDW_WKUPDBC(x) (((x) << AT91_SHDW_WKUPDBC_SHIFT) \ & AT91_SHDW_WKUPDBC_MASK) diff --git a/drivers/power/supply/Kconfig b/drivers/power/supply/Kconfig index eec646c568b7..1699b9269a78 100644 --- a/drivers/power/supply/Kconfig +++ b/drivers/power/supply/Kconfig @@ -229,6 +229,7 @@ config BATTERY_SBS config CHARGER_SBS tristate "SBS Compliant charger" depends on I2C + select REGMAP_I2C help Say Y to include support for SBS compliant battery chargers. diff --git a/drivers/power/supply/axp20x_usb_power.c b/drivers/power/supply/axp20x_usb_power.c index 70b28b699a80..8933ae26c3d6 100644 --- a/drivers/power/supply/axp20x_usb_power.c +++ b/drivers/power/supply/axp20x_usb_power.c @@ -593,6 +593,7 @@ static int axp20x_usb_power_probe(struct platform_device *pdev) power->axp20x_id = axp_data->axp20x_id; power->regmap = axp20x->regmap; power->num_irqs = axp_data->num_irq_names; + INIT_DELAYED_WORK(&power->vbus_detect, axp20x_usb_power_poll_vbus); if (power->axp20x_id == AXP202_ID) { /* Enable vbus valid checking */ @@ -645,7 +646,6 @@ static int axp20x_usb_power_probe(struct platform_device *pdev) } } - INIT_DELAYED_WORK(&power->vbus_detect, axp20x_usb_power_poll_vbus); if (axp20x_usb_vbus_needs_polling(power)) queue_delayed_work(system_power_efficient_wq, &power->vbus_detect, 0); diff --git a/drivers/power/supply/cpcap-battery.c b/drivers/power/supply/cpcap-battery.c index 295611b3b15e..cebc5c8fda1b 100644 --- a/drivers/power/supply/cpcap-battery.c +++ b/drivers/power/supply/cpcap-battery.c @@ -561,17 +561,21 @@ static int cpcap_battery_update_charger(struct cpcap_battery_ddata *ddata, POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE, &prop); if (error) - return error; + goto out_put; /* Allow charger const voltage lower than battery const voltage */ if (const_charge_voltage > prop.intval) - return 0; + goto out_put; val.intval = const_charge_voltage; - return power_supply_set_property(charger, + error = power_supply_set_property(charger, POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE, &val); +out_put: + power_supply_put(charger); + + return error; } static int cpcap_battery_set_property(struct power_supply *psy, @@ -666,7 +670,7 @@ static int cpcap_battery_init_irq(struct platform_device *pdev, error = devm_request_threaded_irq(ddata->dev, irq, NULL, cpcap_battery_irq_thread, - IRQF_SHARED, + IRQF_SHARED | IRQF_ONESHOT, name, ddata); if (error) { dev_err(ddata->dev, "could not get irq %s: %i\n", diff --git a/drivers/power/supply/cpcap-charger.c b/drivers/power/supply/cpcap-charger.c index c0d452e3dc8b..22fff01425d6 100644 --- a/drivers/power/supply/cpcap-charger.c +++ b/drivers/power/supply/cpcap-charger.c @@ -301,6 +301,8 @@ cpcap_charger_get_bat_const_charge_voltage(struct cpcap_charger_ddata *ddata) &prop); if (!error) voltage = prop.intval; + + power_supply_put(battery); } return voltage; @@ -708,7 +710,7 @@ static int cpcap_usb_init_irq(struct platform_device *pdev, error = devm_request_threaded_irq(ddata->dev, irq, NULL, cpcap_charger_irq_thread, - IRQF_SHARED, + IRQF_SHARED | IRQF_ONESHOT, name, ddata); if (error) { dev_err(ddata->dev, "could not get irq %s: %i\n", diff --git a/drivers/power/supply/smb347-charger.c b/drivers/power/supply/smb347-charger.c index d3bf35ed12ce..8cfbd8d6b478 100644 --- a/drivers/power/supply/smb347-charger.c +++ b/drivers/power/supply/smb347-charger.c @@ -137,6 +137,7 @@ * @mains_online: is AC/DC input connected * @usb_online: is USB input connected * @charging_enabled: is charging enabled + * @irq_unsupported: is interrupt unsupported by SMB hardware * @max_charge_current: maximum current (in uA) the battery can be charged * @max_charge_voltage: maximum voltage (in uV) the battery can be charged * @pre_charge_current: current (in uA) to use in pre-charging phase @@ -193,6 +194,7 @@ struct smb347_charger { bool mains_online; bool usb_online; bool charging_enabled; + bool irq_unsupported; unsigned int max_charge_current; unsigned int max_charge_voltage; @@ -862,6 +864,9 @@ static int smb347_irq_set(struct smb347_charger *smb, bool enable) { int ret; + if (smb->irq_unsupported) + return 0; + ret = smb347_set_writable(smb, true); if (ret < 0) return ret; @@ -923,8 +928,6 @@ static int smb347_irq_init(struct smb347_charger *smb, ret = regmap_update_bits(smb->regmap, CFG_STAT, CFG_STAT_ACTIVE_HIGH | CFG_STAT_DISABLED, CFG_STAT_DISABLED); - if (ret < 0) - client->irq = 0; smb347_set_writable(smb, false); @@ -1345,6 +1348,7 @@ static int smb347_probe(struct i2c_client *client, if (ret < 0) { dev_warn(dev, "failed to initialize IRQ: %d\n", ret); dev_warn(dev, "disabling IRQ support\n"); + smb->irq_unsupported = true; } else { smb347_irq_enable(smb); } @@ -1357,8 +1361,8 @@ static int smb347_remove(struct i2c_client *client) { struct smb347_charger *smb = i2c_get_clientdata(client); - if (client->irq) - smb347_irq_disable(smb); + smb347_irq_disable(smb); + return 0; } diff --git a/drivers/pwm/pwm-iqs620a.c b/drivers/pwm/pwm-iqs620a.c index 5ede8255926e..14b18fb4f527 100644 --- a/drivers/pwm/pwm-iqs620a.c +++ b/drivers/pwm/pwm-iqs620a.c @@ -46,7 +46,8 @@ static int iqs620_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, { struct iqs620_pwm_private *iqs620_pwm; struct iqs62x_core *iqs62x; - u64 duty_scale; + unsigned int duty_cycle; + unsigned int duty_scale; int ret; if (state->polarity != PWM_POLARITY_NORMAL) @@ -70,7 +71,8 @@ static int iqs620_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, * For lower duty cycles (e.g. 0), the PWM output is simply disabled to * allow an external pull-down resistor to hold the GPIO3/LTX pin low. */ - duty_scale = div_u64(state->duty_cycle * 256, IQS620_PWM_PERIOD_NS); + duty_cycle = min_t(u64, state->duty_cycle, IQS620_PWM_PERIOD_NS); + duty_scale = duty_cycle * 256 / IQS620_PWM_PERIOD_NS; mutex_lock(&iqs620_pwm->lock); @@ -82,7 +84,7 @@ static int iqs620_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, } if (duty_scale) { - u8 duty_val = min_t(u64, duty_scale - 1, 0xff); + u8 duty_val = duty_scale - 1; ret = regmap_write(iqs62x->regmap, IQS620_PWM_DUTY_CYCLE, duty_val); diff --git a/drivers/pwm/pwm-rockchip.c b/drivers/pwm/pwm-rockchip.c index 389a5e140412..f3a5641f6bca 100644 --- a/drivers/pwm/pwm-rockchip.c +++ b/drivers/pwm/pwm-rockchip.c @@ -288,6 +288,7 @@ static int rockchip_pwm_probe(struct platform_device *pdev) const struct of_device_id *id; struct rockchip_pwm_chip *pc; u32 enable_conf, ctrl; + bool enabled; int ret, count; id = of_match_device(rockchip_pwm_dt_ids, &pdev->dev); @@ -330,9 +331,9 @@ static int rockchip_pwm_probe(struct platform_device *pdev) return ret; } - ret = clk_prepare(pc->pclk); + ret = clk_prepare_enable(pc->pclk); if (ret) { - dev_err(&pdev->dev, "Can't prepare APB clk: %d\n", ret); + dev_err(&pdev->dev, "Can't prepare enable APB clk: %d\n", ret); goto err_clk; } @@ -349,23 +350,26 @@ static int rockchip_pwm_probe(struct platform_device *pdev) pc->chip.of_pwm_n_cells = 3; } + enable_conf = pc->data->enable_conf; + ctrl = readl_relaxed(pc->base + pc->data->regs.ctrl); + enabled = (ctrl & enable_conf) == enable_conf; + ret = pwmchip_add(&pc->chip); if (ret < 0) { - clk_unprepare(pc->clk); dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret); goto err_pclk; } /* Keep the PWM clk enabled if the PWM appears to be up and running. */ - enable_conf = pc->data->enable_conf; - ctrl = readl_relaxed(pc->base + pc->data->regs.ctrl); - if ((ctrl & enable_conf) != enable_conf) + if (!enabled) clk_disable(pc->clk); + clk_disable(pc->pclk); + return 0; err_pclk: - clk_unprepare(pc->pclk); + clk_disable_unprepare(pc->pclk); err_clk: clk_disable_unprepare(pc->clk); diff --git a/drivers/regulator/axp20x-regulator.c b/drivers/regulator/axp20x-regulator.c index 90cb8445f721..d260c442b788 100644 --- a/drivers/regulator/axp20x-regulator.c +++ b/drivers/regulator/axp20x-regulator.c @@ -1070,7 +1070,7 @@ static int axp20x_set_dcdc_freq(struct platform_device *pdev, u32 dcdcfreq) static int axp20x_regulator_parse_dt(struct platform_device *pdev) { struct device_node *np, *regulators; - int ret; + int ret = 0; u32 dcdcfreq = 0; np = of_node_get(pdev->dev.parent->of_node); @@ -1085,13 +1085,12 @@ static int axp20x_regulator_parse_dt(struct platform_device *pdev) ret = axp20x_set_dcdc_freq(pdev, dcdcfreq); if (ret < 0) { dev_err(&pdev->dev, "Error setting dcdc frequency: %d\n", ret); - return ret; } - of_node_put(regulators); } - return 0; + of_node_put(np); + return ret; } static int axp20x_set_dcdc_workmode(struct regulator_dev *rdev, int id, u32 workmode) diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c index 67a768fe5b2a..2e6c6af9d1c3 100644 --- a/drivers/regulator/core.c +++ b/drivers/regulator/core.c @@ -1617,7 +1617,7 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, const char *supply_name) { struct regulator *regulator; - int err; + int err = 0; if (dev) { char buf[REG_STR_SIZE]; @@ -1663,8 +1663,8 @@ static struct regulator *create_regulator(struct regulator_dev *rdev, } } - regulator->debugfs = debugfs_create_dir(supply_name, - rdev->debugfs); + if (err != -EEXIST) + regulator->debugfs = debugfs_create_dir(supply_name, rdev->debugfs); if (!regulator->debugfs) { rdev_dbg(rdev, "Failed to create debugfs directory\n"); } else { diff --git a/drivers/regulator/pca9450-regulator.c b/drivers/regulator/pca9450-regulator.c index cb29421d745a..d38109cc3a01 100644 --- a/drivers/regulator/pca9450-regulator.c +++ b/drivers/regulator/pca9450-regulator.c @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -32,6 +33,7 @@ struct pca9450_regulator_desc { struct pca9450 { struct device *dev; struct regmap *regmap; + struct gpio_desc *sd_vsel_gpio; enum pca9450_chip_type type; unsigned int rcnt; int irq; @@ -795,6 +797,34 @@ static int pca9450_i2c_probe(struct i2c_client *i2c, return ret; } + /* Clear PRESET_EN bit in BUCK123_DVS to use DVS registers */ + ret = regmap_clear_bits(pca9450->regmap, PCA9450_REG_BUCK123_DVS, + BUCK123_PRESET_EN); + if (ret) { + dev_err(&i2c->dev, "Failed to clear PRESET_EN bit: %d\n", ret); + return ret; + } + + /* Set reset behavior on assertion of WDOG_B signal */ + ret = regmap_update_bits(pca9450->regmap, PCA9450_REG_RESET_CTRL, + WDOG_B_CFG_MASK, WDOG_B_CFG_COLD_LDO12); + if (ret) { + dev_err(&i2c->dev, "Failed to set WDOG_B reset behavior\n"); + return ret; + } + + /* + * The driver uses the LDO5CTRL_H register to control the LDO5 regulator. + * This is only valid if the SD_VSEL input of the PMIC is high. Let's + * check if the pin is available as GPIO and set it to high. + */ + pca9450->sd_vsel_gpio = gpiod_get_optional(pca9450->dev, "sd-vsel", GPIOD_OUT_HIGH); + + if (IS_ERR(pca9450->sd_vsel_gpio)) { + dev_err(&i2c->dev, "Failed to get SD_VSEL GPIO\n"); + return ret; + } + dev_info(&i2c->dev, "%s probed.\n", type == PCA9450_TYPE_PCA9450A ? "pca9450a" : "pca9450bc"); diff --git a/drivers/regulator/qcom-rpmh-regulator.c b/drivers/regulator/qcom-rpmh-regulator.c index c395a8dda6f7..0fd3da36f62e 100644 --- a/drivers/regulator/qcom-rpmh-regulator.c +++ b/drivers/regulator/qcom-rpmh-regulator.c @@ -726,7 +726,16 @@ static const struct rpmh_vreg_hw_data pmic5_ftsmps510 = { static const struct rpmh_vreg_hw_data pmic5_hfsmps515 = { .regulator_type = VRM, .ops = &rpmh_regulator_vrm_ops, - .voltage_range = REGULATOR_LINEAR_RANGE(2800000, 0, 4, 16000), + .voltage_range = REGULATOR_LINEAR_RANGE(320000, 0, 235, 16000), + .n_voltages = 236, + .pmic_mode_map = pmic_mode_map_pmic5_smps, + .of_map_mode = rpmh_regulator_pmic4_smps_of_map_mode, +}; + +static const struct rpmh_vreg_hw_data pmic5_hfsmps515_1 = { + .regulator_type = VRM, + .ops = &rpmh_regulator_vrm_ops, + .voltage_range = REGULATOR_LINEAR_RANGE(900000, 0, 4, 16000), .n_voltages = 5, .pmic_mode_map = pmic_mode_map_pmic5_smps, .of_map_mode = rpmh_regulator_pmic4_smps_of_map_mode, @@ -892,7 +901,7 @@ static const struct rpmh_vreg_init_data pm8350_vreg_data[] = { }; static const struct rpmh_vreg_init_data pm8350c_vreg_data[] = { - RPMH_VREG("smps1", "smp%s1", &pmic5_hfsmps510, "vdd-s1"), + RPMH_VREG("smps1", "smp%s1", &pmic5_hfsmps515, "vdd-s1"), RPMH_VREG("smps2", "smp%s2", &pmic5_ftsmps510, "vdd-s2"), RPMH_VREG("smps3", "smp%s3", &pmic5_ftsmps510, "vdd-s3"), RPMH_VREG("smps4", "smp%s4", &pmic5_ftsmps510, "vdd-s4"), @@ -928,6 +937,19 @@ static const struct rpmh_vreg_init_data pm8009_vreg_data[] = { RPMH_VREG("ldo4", "ldo%s4", &pmic5_nldo, "vdd-l4"), RPMH_VREG("ldo5", "ldo%s5", &pmic5_pldo, "vdd-l5-l6"), RPMH_VREG("ldo6", "ldo%s6", &pmic5_pldo, "vdd-l5-l6"), + RPMH_VREG("ldo7", "ldo%s7", &pmic5_pldo_lv, "vdd-l7"), + {}, +}; + +static const struct rpmh_vreg_init_data pm8009_1_vreg_data[] = { + RPMH_VREG("smps1", "smp%s1", &pmic5_hfsmps510, "vdd-s1"), + RPMH_VREG("smps2", "smp%s2", &pmic5_hfsmps515_1, "vdd-s2"), + RPMH_VREG("ldo1", "ldo%s1", &pmic5_nldo, "vdd-l1"), + RPMH_VREG("ldo2", "ldo%s2", &pmic5_nldo, "vdd-l2"), + RPMH_VREG("ldo3", "ldo%s3", &pmic5_nldo, "vdd-l3"), + RPMH_VREG("ldo4", "ldo%s4", &pmic5_nldo, "vdd-l4"), + RPMH_VREG("ldo5", "ldo%s5", &pmic5_pldo, "vdd-l5-l6"), + RPMH_VREG("ldo6", "ldo%s6", &pmic5_pldo, "vdd-l5-l6"), RPMH_VREG("ldo7", "ldo%s6", &pmic5_pldo_lv, "vdd-l7"), {}, }; @@ -1057,6 +1079,10 @@ static const struct of_device_id __maybe_unused rpmh_regulator_match_table[] = { .compatible = "qcom,pm8009-rpmh-regulators", .data = pm8009_vreg_data, }, + { + .compatible = "qcom,pm8009-1-rpmh-regulators", + .data = pm8009_1_vreg_data, + }, { .compatible = "qcom,pm8150-rpmh-regulators", .data = pm8150_vreg_data, diff --git a/drivers/regulator/rohm-regulator.c b/drivers/regulator/rohm-regulator.c index 399002383b28..5c558b153d55 100644 --- a/drivers/regulator/rohm-regulator.c +++ b/drivers/regulator/rohm-regulator.c @@ -52,9 +52,12 @@ int rohm_regulator_set_dvs_levels(const struct rohm_dvs_config *dvs, char *prop; unsigned int reg, mask, omask, oreg = desc->enable_reg; - for (i = 0; i < ROHM_DVS_LEVEL_MAX && !ret; i++) { - if (dvs->level_map & (1 << i)) { - switch (i + 1) { + for (i = 0; i < ROHM_DVS_LEVEL_VALID_AMOUNT && !ret; i++) { + int bit; + + bit = BIT(i); + if (dvs->level_map & bit) { + switch (bit) { case ROHM_DVS_LEVEL_RUN: prop = "rohm,dvs-run-voltage"; reg = dvs->run_reg; diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c index 3fa472127e9a..7c111bbdc2af 100644 --- a/drivers/regulator/s5m8767.c +++ b/drivers/regulator/s5m8767.c @@ -544,14 +544,18 @@ static int s5m8767_pmic_dt_parse_pdata(struct platform_device *pdev, rdata = devm_kcalloc(&pdev->dev, pdata->num_regulators, sizeof(*rdata), GFP_KERNEL); - if (!rdata) + if (!rdata) { + of_node_put(regulators_np); return -ENOMEM; + } rmode = devm_kcalloc(&pdev->dev, pdata->num_regulators, sizeof(*rmode), GFP_KERNEL); - if (!rmode) + if (!rmode) { + of_node_put(regulators_np); return -ENOMEM; + } pdata->regulators = rdata; pdata->opmode = rmode; @@ -573,10 +577,13 @@ static int s5m8767_pmic_dt_parse_pdata(struct platform_device *pdev, "s5m8767,pmic-ext-control", GPIOD_OUT_HIGH | GPIOD_FLAGS_BIT_NONEXCLUSIVE, "s5m8767"); - if (PTR_ERR(rdata->ext_control_gpiod) == -ENOENT) + if (PTR_ERR(rdata->ext_control_gpiod) == -ENOENT) { rdata->ext_control_gpiod = NULL; - else if (IS_ERR(rdata->ext_control_gpiod)) + } else if (IS_ERR(rdata->ext_control_gpiod)) { + of_node_put(reg_np); + of_node_put(regulators_np); return PTR_ERR(rdata->ext_control_gpiod); + } rdata->id = i; rdata->initdata = of_get_regulator_init_data( diff --git a/drivers/remoteproc/mtk_common.h b/drivers/remoteproc/mtk_common.h index 988edb4977c3..bcab38511bf3 100644 --- a/drivers/remoteproc/mtk_common.h +++ b/drivers/remoteproc/mtk_common.h @@ -47,6 +47,7 @@ #define MT8192_CORE0_SW_RSTN_CLR 0x10000 #define MT8192_CORE0_SW_RSTN_SET 0x10004 +#define MT8192_CORE0_WDT_IRQ 0x10030 #define MT8192_CORE0_WDT_CFG 0x10034 #define SCP_FW_VER_LEN 32 diff --git a/drivers/remoteproc/mtk_scp.c b/drivers/remoteproc/mtk_scp.c index e0c235690361..eba825b46696 100644 --- a/drivers/remoteproc/mtk_scp.c +++ b/drivers/remoteproc/mtk_scp.c @@ -197,17 +197,19 @@ static void mt8192_scp_irq_handler(struct mtk_scp *scp) scp_to_host = readl(scp->reg_base + MT8192_SCP2APMCU_IPC_SET); - if (scp_to_host & MT8192_SCP_IPC_INT_BIT) + if (scp_to_host & MT8192_SCP_IPC_INT_BIT) { scp_ipi_handler(scp); - else - scp_wdt_handler(scp, scp_to_host); - /* - * SCP won't send another interrupt until we clear - * MT8192_SCP2APMCU_IPC. - */ - writel(MT8192_SCP_IPC_INT_BIT, - scp->reg_base + MT8192_SCP2APMCU_IPC_CLR); + /* + * SCP won't send another interrupt until we clear + * MT8192_SCP2APMCU_IPC. + */ + writel(MT8192_SCP_IPC_INT_BIT, + scp->reg_base + MT8192_SCP2APMCU_IPC_CLR); + } else { + scp_wdt_handler(scp, scp_to_host); + writel(1, scp->reg_base + MT8192_CORE0_WDT_IRQ); + } } static irqreturn_t scp_irq_handler(int irq, void *priv) diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 6123f9f4fbc9..4e2b3a175607 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -692,6 +692,7 @@ config RTC_DRV_S5M tristate "Samsung S2M/S5M series" depends on MFD_SEC_CORE || COMPILE_TEST select REGMAP_IRQ + select REGMAP_I2C help If you say yes here you will get support for the RTC of Samsung S2MPS14 and S5M PMIC series. @@ -1300,7 +1301,7 @@ config RTC_DRV_OPAL config RTC_DRV_ZYNQMP tristate "Xilinx Zynq Ultrascale+ MPSoC RTC" - depends on OF + depends on OF && HAS_IOMEM help If you say yes here you get support for the RTC controller found on Xilinx Zynq Ultrascale+ MPSoC. diff --git a/drivers/rtc/rtc-rx6110.c b/drivers/rtc/rtc-rx6110.c index a7b671a21022..79161d4c6ce4 100644 --- a/drivers/rtc/rtc-rx6110.c +++ b/drivers/rtc/rtc-rx6110.c @@ -331,7 +331,7 @@ static int rx6110_probe(struct rx6110_data *rx6110, struct device *dev) return 0; } -#ifdef CONFIG_SPI_MASTER +#if IS_ENABLED(CONFIG_SPI_MASTER) static struct regmap_config regmap_spi_config = { .reg_bits = 8, .val_bits = 8, @@ -411,7 +411,7 @@ static void rx6110_spi_unregister(void) } #endif /* CONFIG_SPI_MASTER */ -#ifdef CONFIG_I2C +#if IS_ENABLED(CONFIG_I2C) static struct regmap_config regmap_i2c_config = { .reg_bits = 8, .val_bits = 8, diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index c7eb9a10c680..3101eab0addd 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -3068,7 +3068,8 @@ static blk_status_t do_dasd_request(struct blk_mq_hw_ctx *hctx, basedev = block->base; spin_lock_irq(&dq->lock); - if (basedev->state < DASD_STATE_READY) { + if (basedev->state < DASD_STATE_READY || + test_bit(DASD_FLAG_OFFLINE, &basedev->flags)) { DBF_DEV_EVENT(DBF_ERR, basedev, "device not ready for request %p", req); rc = BLK_STS_IOERR; @@ -3503,8 +3504,6 @@ void dasd_generic_remove(struct ccw_device *cdev) struct dasd_device *device; struct dasd_block *block; - cdev->handler = NULL; - device = dasd_device_from_cdev(cdev); if (IS_ERR(device)) { dasd_remove_sysfs_files(cdev); @@ -3523,6 +3522,7 @@ void dasd_generic_remove(struct ccw_device *cdev) * no quite down yet. */ dasd_set_target_state(device, DASD_STATE_NEW); + cdev->handler = NULL; /* dasd_delete_device destroys the device reference. */ block = device->block; dasd_delete_device(device); diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 68106be4ba7a..767ac41686fe 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -543,7 +543,7 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev, if (ret) return ret; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; } case VFIO_DEVICE_GET_REGION_INFO: { @@ -561,7 +561,7 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev, if (ret) return ret; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; } case VFIO_DEVICE_GET_IRQ_INFO: { @@ -582,7 +582,7 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev, if (info.count == -1) return -EINVAL; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; } case VFIO_DEVICE_SET_IRQS: { diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 41fc2e4135fe..1ffdd411201c 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -1286,7 +1286,7 @@ static int vfio_ap_mdev_get_device_info(unsigned long arg) info.num_regions = 0; info.num_irqs = 0; - return copy_to_user((void __user *)arg, &info, minsz); + return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; } static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev, diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index 10206e4498d0..52eaf51c9bb6 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -1438,6 +1438,8 @@ static int icarsamodexpo_ioctl(struct ap_perms *perms, unsigned long arg) if (rc == -EAGAIN) tr.again_counter++; } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); + if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX) + rc = -EIO; if (rc) { ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSAMODEXPO rc=%d\n", rc); return rc; @@ -1481,6 +1483,8 @@ static int icarsacrt_ioctl(struct ap_perms *perms, unsigned long arg) if (rc == -EAGAIN) tr.again_counter++; } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); + if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX) + rc = -EIO; if (rc) { ZCRYPT_DBF(DBF_DEBUG, "ioctl ICARSACRT rc=%d\n", rc); return rc; @@ -1524,6 +1528,8 @@ static int zsecsendcprb_ioctl(struct ap_perms *perms, unsigned long arg) if (rc == -EAGAIN) tr.again_counter++; } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); + if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX) + rc = -EIO; if (rc) ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDCPRB rc=%d status=0x%x\n", rc, xcRB.status); @@ -1568,6 +1574,8 @@ static int zsendep11cprb_ioctl(struct ap_perms *perms, unsigned long arg) if (rc == -EAGAIN) tr.again_counter++; } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); + if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX) + rc = -EIO; if (rc) ZCRYPT_DBF(DBF_DEBUG, "ioctl ZSENDEP11CPRB rc=%d\n", rc); if (copy_to_user(uxcrb, &xcrb, sizeof(xcrb))) @@ -1744,6 +1752,8 @@ static long trans_modexpo32(struct ap_perms *perms, struct file *filp, if (rc == -EAGAIN) tr.again_counter++; } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); + if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX) + rc = -EIO; if (rc) return rc; return put_user(mex64.outputdatalength, @@ -1795,6 +1805,8 @@ static long trans_modexpo_crt32(struct ap_perms *perms, struct file *filp, if (rc == -EAGAIN) tr.again_counter++; } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); + if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX) + rc = -EIO; if (rc) return rc; return put_user(crt64.outputdatalength, @@ -1865,6 +1877,8 @@ static long trans_xcRB32(struct ap_perms *perms, struct file *filp, if (rc == -EAGAIN) tr.again_counter++; } while (rc == -EAGAIN && tr.again_counter < TRACK_AGAIN_MAX); + if (rc == -EAGAIN && tr.again_counter >= TRACK_AGAIN_MAX) + rc = -EIO; xcRB32.reply_control_blk_length = xcRB64.reply_control_blk_length; xcRB32.reply_data_length = xcRB64.reply_data_length; xcRB32.status = xcRB64.status; diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h index 28f637042d44..7aed775ee874 100644 --- a/drivers/s390/net/qeth_core.h +++ b/drivers/s390/net/qeth_core.h @@ -436,7 +436,7 @@ struct qeth_qdio_out_buffer { int is_header[QDIO_MAX_ELEMENTS_PER_BUFFER]; struct qeth_qdio_out_q *q; - struct qeth_qdio_out_buffer *next_pending; + struct list_head list_entry; }; struct qeth_card; @@ -500,6 +500,7 @@ struct qeth_qdio_out_q { struct qdio_buffer *qdio_bufs[QDIO_MAX_BUFFERS_PER_Q]; struct qeth_qdio_out_buffer *bufs[QDIO_MAX_BUFFERS_PER_Q]; struct qdio_outbuf_state *bufstates; /* convenience pointer */ + struct list_head pending_bufs; struct qeth_out_q_stats stats; spinlock_t lock; unsigned int priority; diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index cf18d87da41e..5e4dcc9aae1b 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -73,8 +73,6 @@ static void qeth_free_qdio_queues(struct qeth_card *card); static void qeth_notify_skbs(struct qeth_qdio_out_q *queue, struct qeth_qdio_out_buffer *buf, enum iucv_tx_notify notification); -static void qeth_tx_complete_buf(struct qeth_qdio_out_buffer *buf, bool error, - int budget); static void qeth_close_dev_handler(struct work_struct *work) { @@ -465,41 +463,6 @@ static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15, return n; } -static void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q, int bidx, - int forced_cleanup) -{ - if (q->card->options.cq != QETH_CQ_ENABLED) - return; - - if (q->bufs[bidx]->next_pending != NULL) { - struct qeth_qdio_out_buffer *head = q->bufs[bidx]; - struct qeth_qdio_out_buffer *c = q->bufs[bidx]->next_pending; - - while (c) { - if (forced_cleanup || - atomic_read(&c->state) == QETH_QDIO_BUF_EMPTY) { - struct qeth_qdio_out_buffer *f = c; - - QETH_CARD_TEXT(f->q->card, 5, "fp"); - QETH_CARD_TEXT_(f->q->card, 5, "%lx", (long) f); - /* release here to avoid interleaving between - outbound tasklet and inbound tasklet - regarding notifications and lifecycle */ - qeth_tx_complete_buf(c, forced_cleanup, 0); - - c = f->next_pending; - WARN_ON_ONCE(head->next_pending != f); - head->next_pending = c; - kmem_cache_free(qeth_qdio_outbuf_cache, f); - } else { - head = c; - c = c->next_pending; - } - - } - } -} - static void qeth_qdio_handle_aob(struct qeth_card *card, unsigned long phys_aob_addr) { @@ -507,6 +470,7 @@ static void qeth_qdio_handle_aob(struct qeth_card *card, struct qaob *aob; struct qeth_qdio_out_buffer *buffer; enum iucv_tx_notify notification; + struct qeth_qdio_out_q *queue; unsigned int i; aob = (struct qaob *) phys_to_virt(phys_aob_addr); @@ -537,7 +501,7 @@ static void qeth_qdio_handle_aob(struct qeth_card *card, qeth_notify_skbs(buffer->q, buffer, notification); /* Free dangling allocations. The attached skbs are handled by - * qeth_cleanup_handled_pending(). + * qeth_tx_complete_pending_bufs(). */ for (i = 0; i < aob->sb_count && i < QETH_MAX_BUFFER_ELEMENTS(card); @@ -549,7 +513,9 @@ static void qeth_qdio_handle_aob(struct qeth_card *card, buffer->is_header[i] = 0; } + queue = buffer->q; atomic_set(&buffer->state, QETH_QDIO_BUF_EMPTY); + napi_schedule(&queue->napi); break; default: WARN_ON_ONCE(1); @@ -1420,9 +1386,6 @@ static void qeth_tx_complete_buf(struct qeth_qdio_out_buffer *buf, bool error, struct qeth_qdio_out_q *queue = buf->q; struct sk_buff *skb; - if (atomic_read(&buf->state) == QETH_QDIO_BUF_PENDING) - qeth_notify_skbs(queue, buf, TX_NOTIFY_GENERALERROR); - /* Empty buffer? */ if (buf->next_element_to_fill == 0) return; @@ -1484,14 +1447,38 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue, atomic_set(&buf->state, QETH_QDIO_BUF_EMPTY); } +static void qeth_tx_complete_pending_bufs(struct qeth_card *card, + struct qeth_qdio_out_q *queue, + bool drain) +{ + struct qeth_qdio_out_buffer *buf, *tmp; + + list_for_each_entry_safe(buf, tmp, &queue->pending_bufs, list_entry) { + if (drain || atomic_read(&buf->state) == QETH_QDIO_BUF_EMPTY) { + QETH_CARD_TEXT(card, 5, "fp"); + QETH_CARD_TEXT_(card, 5, "%lx", (long) buf); + + if (drain) + qeth_notify_skbs(queue, buf, + TX_NOTIFY_GENERALERROR); + qeth_tx_complete_buf(buf, drain, 0); + + list_del(&buf->list_entry); + kmem_cache_free(qeth_qdio_outbuf_cache, buf); + } + } +} + static void qeth_drain_output_queue(struct qeth_qdio_out_q *q, bool free) { int j; + qeth_tx_complete_pending_bufs(q->card, q, true); + for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) { if (!q->bufs[j]) continue; - qeth_cleanup_handled_pending(q, j, 1); + qeth_clear_output_buffer(q, q->bufs[j], true, 0); if (free) { kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[j]); @@ -2611,7 +2598,6 @@ static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx) skb_queue_head_init(&newbuf->skb_list); lockdep_set_class(&newbuf->skb_list.lock, &qdio_out_skb_queue_key); newbuf->q = q; - newbuf->next_pending = q->bufs[bidx]; atomic_set(&newbuf->state, QETH_QDIO_BUF_EMPTY); q->bufs[bidx] = newbuf; return 0; @@ -2630,15 +2616,28 @@ static void qeth_free_output_queue(struct qeth_qdio_out_q *q) static struct qeth_qdio_out_q *qeth_alloc_output_queue(void) { struct qeth_qdio_out_q *q = kzalloc(sizeof(*q), GFP_KERNEL); + unsigned int i; if (!q) return NULL; - if (qdio_alloc_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q)) { - kfree(q); - return NULL; + if (qdio_alloc_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q)) + goto err_qdio_bufs; + + for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) { + if (qeth_init_qdio_out_buf(q, i)) + goto err_out_bufs; } + return q; + +err_out_bufs: + while (i > 0) + kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[--i]); + qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q); +err_qdio_bufs: + kfree(q); + return NULL; } static void qeth_tx_completion_timer(struct timer_list *timer) @@ -2651,7 +2650,7 @@ static void qeth_tx_completion_timer(struct timer_list *timer) static int qeth_alloc_qdio_queues(struct qeth_card *card) { - int i, j; + unsigned int i; QETH_CARD_TEXT(card, 2, "allcqdbf"); @@ -2680,18 +2679,12 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card) card->qdio.out_qs[i] = queue; queue->card = card; queue->queue_no = i; + INIT_LIST_HEAD(&queue->pending_bufs); spin_lock_init(&queue->lock); timer_setup(&queue->timer, qeth_tx_completion_timer, 0); queue->coalesce_usecs = QETH_TX_COALESCE_USECS; queue->max_coalesced_frames = QETH_TX_MAX_COALESCED_FRAMES; queue->priority = QETH_QIB_PQUE_PRIO_DEFAULT; - - /* give outbound qeth_qdio_buffers their qdio_buffers */ - for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) { - WARN_ON(queue->bufs[j]); - if (qeth_init_qdio_out_buf(queue, j)) - goto out_freeoutqbufs; - } } /* completion */ @@ -2700,13 +2693,6 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card) return 0; -out_freeoutqbufs: - while (j > 0) { - --j; - kmem_cache_free(qeth_qdio_outbuf_cache, - card->qdio.out_qs[i]->bufs[j]); - card->qdio.out_qs[i]->bufs[j] = NULL; - } out_freeoutq: while (i > 0) { qeth_free_output_queue(card->qdio.out_qs[--i]); @@ -6100,6 +6086,8 @@ static void qeth_iqd_tx_complete(struct qeth_qdio_out_q *queue, qeth_schedule_recovery(card); } + list_add(&buffer->list_entry, + &queue->pending_bufs); /* Skip clearing the buffer: */ return; case QETH_QDIO_BUF_QAOB_OK: @@ -6155,6 +6143,8 @@ static int qeth_tx_poll(struct napi_struct *napi, int budget) unsigned int bytes = 0; int completed; + qeth_tx_complete_pending_bufs(card, queue, false); + if (qeth_out_queue_is_empty(queue)) { napi_complete(napi); return 0; @@ -6187,7 +6177,6 @@ static int qeth_tx_poll(struct napi_struct *napi, int budget) qeth_handle_send_error(card, buffer, error); qeth_iqd_tx_complete(queue, bidx, error, budget); - qeth_cleanup_handled_pending(queue, bidx, false); } netdev_tx_completed_queue(txq, packets, bytes); @@ -7239,9 +7228,7 @@ int qeth_open(struct net_device *dev) card->data.state = CH_STATE_UP; netif_tx_start_all_queues(dev); - napi_enable(&card->napi); local_bh_disable(); - napi_schedule(&card->napi); if (IS_IQD(card)) { struct qeth_qdio_out_q *queue; unsigned int i; @@ -7253,8 +7240,12 @@ int qeth_open(struct net_device *dev) napi_schedule(&queue->napi); } } + + napi_enable(&card->napi); + napi_schedule(&card->napi); /* kick-start the NAPI softirq: */ local_bh_enable(); + return 0; } EXPORT_SYMBOL_GPL(qeth_open); @@ -7264,6 +7255,11 @@ int qeth_stop(struct net_device *dev) struct qeth_card *card = dev->ml_priv; QETH_CARD_TEXT(card, 4, "qethstop"); + + napi_disable(&card->napi); + cancel_delayed_work_sync(&card->buffer_reclaim_work); + qdio_stop_irq(CARD_DDEV(card)); + if (IS_IQD(card)) { struct qeth_qdio_out_q *queue; unsigned int i; @@ -7284,10 +7280,6 @@ int qeth_stop(struct net_device *dev) netif_tx_disable(dev); } - napi_disable(&card->napi); - cancel_delayed_work_sync(&card->buffer_reclaim_work); - qdio_stop_irq(CARD_DDEV(card)); - return 0; } EXPORT_SYMBOL_GPL(qeth_stop); diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index 5730572b52cd..54e686dca6de 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -117,7 +117,7 @@ struct virtio_rev_info { }; /* the highest virtio-ccw revision we support */ -#define VIRTIO_CCW_REV_MAX 1 +#define VIRTIO_CCW_REV_MAX 2 struct virtio_ccw_vq_info { struct virtqueue *vq; @@ -952,7 +952,7 @@ static u8 virtio_ccw_get_status(struct virtio_device *vdev) u8 old_status = vcdev->dma_area->status; struct ccw1 *ccw; - if (vcdev->revision < 1) + if (vcdev->revision < 2) return vcdev->dma_area->status; ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw)); diff --git a/drivers/scsi/aic94xx/aic94xx_scb.c b/drivers/scsi/aic94xx/aic94xx_scb.c index 13677973da5c..770546177ca4 100644 --- a/drivers/scsi/aic94xx/aic94xx_scb.c +++ b/drivers/scsi/aic94xx/aic94xx_scb.c @@ -68,7 +68,6 @@ static void asd_phy_event_tasklet(struct asd_ascb *ascb, struct done_list_struct *dl) { struct asd_ha_struct *asd_ha = ascb->ha; - struct sas_ha_struct *sas_ha = &asd_ha->sas_ha; int phy_id = dl->status_block[0] & DL_PHY_MASK; struct asd_phy *phy = &asd_ha->phys[phy_id]; @@ -81,7 +80,7 @@ static void asd_phy_event_tasklet(struct asd_ascb *ascb, ASD_DPRINTK("phy%d: device unplugged\n", phy_id); asd_turn_led(asd_ha, phy_id, 0); sas_phy_disconnected(&phy->sas_phy); - sas_ha->notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL); + sas_notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL); break; case CURRENT_OOB_DONE: /* hot plugged device */ @@ -89,12 +88,12 @@ static void asd_phy_event_tasklet(struct asd_ascb *ascb, get_lrate_mode(phy, oob_mode); ASD_DPRINTK("phy%d device plugged: lrate:0x%x, proto:0x%x\n", phy_id, phy->sas_phy.linkrate, phy->sas_phy.iproto); - sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE); + sas_notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE); break; case CURRENT_SPINUP_HOLD: /* hot plug SATA, no COMWAKE sent */ asd_turn_led(asd_ha, phy_id, 1); - sas_ha->notify_phy_event(&phy->sas_phy, PHYE_SPINUP_HOLD); + sas_notify_phy_event(&phy->sas_phy, PHYE_SPINUP_HOLD); break; case CURRENT_GTO_TIMEOUT: case CURRENT_OOB_ERROR: @@ -102,7 +101,7 @@ static void asd_phy_event_tasklet(struct asd_ascb *ascb, dl->status_block[1]); asd_turn_led(asd_ha, phy_id, 0); sas_phy_disconnected(&phy->sas_phy); - sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_ERROR); + sas_notify_phy_event(&phy->sas_phy, PHYE_OOB_ERROR); break; } } @@ -222,7 +221,6 @@ static void asd_bytes_dmaed_tasklet(struct asd_ascb *ascb, int edb_el = edb_id + ascb->edb_index; struct asd_dma_tok *edb = ascb->ha->seq.edb_arr[edb_el]; struct asd_phy *phy = &ascb->ha->phys[phy_id]; - struct sas_ha_struct *sas_ha = phy->sas_phy.ha; u16 size = ((dl->status_block[3] & 7) << 8) | dl->status_block[2]; size = min(size, (u16) sizeof(phy->frame_rcvd)); @@ -234,7 +232,7 @@ static void asd_bytes_dmaed_tasklet(struct asd_ascb *ascb, spin_unlock_irqrestore(&phy->sas_phy.frame_rcvd_lock, flags); asd_dump_frame_rcvd(phy, dl); asd_form_port(ascb->ha, phy); - sas_ha->notify_port_event(&phy->sas_phy, PORTE_BYTES_DMAED); + sas_notify_port_event(&phy->sas_phy, PORTE_BYTES_DMAED); } static void asd_link_reset_err_tasklet(struct asd_ascb *ascb, @@ -270,7 +268,7 @@ static void asd_link_reset_err_tasklet(struct asd_ascb *ascb, asd_turn_led(asd_ha, phy_id, 0); sas_phy_disconnected(sas_phy); asd_deform_port(asd_ha, phy); - sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); if (retries_left == 0) { int num = 1; @@ -315,7 +313,7 @@ static void asd_primitive_rcvd_tasklet(struct asd_ascb *ascb, spin_lock_irqsave(&sas_phy->sas_prim_lock, flags); sas_phy->sas_prim = ffs(cont); spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags); - sas_ha->notify_port_event(sas_phy,PORTE_BROADCAST_RCVD); + sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); break; case LmUNKNOWNP: @@ -336,7 +334,7 @@ static void asd_primitive_rcvd_tasklet(struct asd_ascb *ascb, /* The sequencer disables all phys on that port. * We have to re-enable the phys ourselves. */ asd_deform_port(asd_ha, phy); - sas_ha->notify_port_event(sas_phy, PORTE_HARD_RESET); + sas_notify_port_event(sas_phy, PORTE_HARD_RESET); break; default: @@ -567,7 +565,7 @@ static void escb_tasklet_complete(struct asd_ascb *ascb, /* the device is gone */ sas_phy_disconnected(sas_phy); asd_deform_port(asd_ha, phy); - sas_ha->notify_port_event(sas_phy, PORTE_TIMER_EVENT); + sas_notify_port_event(sas_phy, PORTE_TIMER_EVENT); break; default: ASD_DPRINTK("%s: phy%d: unknown event:0x%x\n", __func__, diff --git a/drivers/scsi/bnx2fc/Kconfig b/drivers/scsi/bnx2fc/Kconfig index 3cf7e08df809..ecdc0f0f4f4e 100644 --- a/drivers/scsi/bnx2fc/Kconfig +++ b/drivers/scsi/bnx2fc/Kconfig @@ -5,6 +5,7 @@ config SCSI_BNX2X_FCOE depends on (IPV6 || IPV6=n) depends on LIBFC depends on LIBFCOE + depends on MMU select NETDEVICES select ETHERNET select NET_VENDOR_BROADCOM diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index cf0bfac920a8..76f8fc3fad59 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -616,7 +616,6 @@ static void hisi_sas_bytes_dmaed(struct hisi_hba *hisi_hba, int phy_no) { struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no]; struct asd_sas_phy *sas_phy = &phy->sas_phy; - struct sas_ha_struct *sas_ha; if (!phy->phy_attached) return; @@ -627,8 +626,7 @@ static void hisi_sas_bytes_dmaed(struct hisi_hba *hisi_hba, int phy_no) return; } - sas_ha = &hisi_hba->sha; - sas_ha->notify_phy_event(sas_phy, PHYE_OOB_DONE); + sas_notify_phy_event(sas_phy, PHYE_OOB_DONE); if (sas_phy->phy) { struct sas_phy *sphy = sas_phy->phy; @@ -656,7 +654,7 @@ static void hisi_sas_bytes_dmaed(struct hisi_hba *hisi_hba, int phy_no) } sas_phy->frame_rcvd_size = phy->frame_rcvd_size; - sas_ha->notify_port_event(sas_phy, PORTE_BYTES_DMAED); + sas_notify_port_event(sas_phy, PORTE_BYTES_DMAED); } static struct hisi_sas_device *hisi_sas_alloc_dev(struct domain_device *device) @@ -1411,7 +1409,6 @@ static void hisi_sas_refresh_port_id(struct hisi_hba *hisi_hba) static void hisi_sas_rescan_topology(struct hisi_hba *hisi_hba, u32 state) { - struct sas_ha_struct *sas_ha = &hisi_hba->sha; struct asd_sas_port *_sas_port = NULL; int phy_no; @@ -1432,7 +1429,7 @@ static void hisi_sas_rescan_topology(struct hisi_hba *hisi_hba, u32 state) _sas_port = sas_port; if (dev_is_expander(dev->dev_type)) - sas_ha->notify_port_event(sas_phy, + sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); } } else { @@ -2194,7 +2191,6 @@ void hisi_sas_phy_down(struct hisi_hba *hisi_hba, int phy_no, int rdy) { struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no]; struct asd_sas_phy *sas_phy = &phy->sas_phy; - struct sas_ha_struct *sas_ha = &hisi_hba->sha; struct device *dev = hisi_hba->dev; if (rdy) { @@ -2210,7 +2206,7 @@ void hisi_sas_phy_down(struct hisi_hba *hisi_hba, int phy_no, int rdy) return; } /* Phy down and not ready */ - sas_ha->notify_phy_event(sas_phy, PHYE_LOSS_OF_SIGNAL); + sas_notify_phy_event(sas_phy, PHYE_LOSS_OF_SIGNAL); sas_phy_disconnected(sas_phy); if (port) { diff --git a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c index 45e866cb9164..22eecc89d41b 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v1_hw.c @@ -1408,7 +1408,6 @@ static irqreturn_t int_bcast_v1_hw(int irq, void *p) struct hisi_sas_phy *phy = p; struct hisi_hba *hisi_hba = phy->hisi_hba; struct asd_sas_phy *sas_phy = &phy->sas_phy; - struct sas_ha_struct *sha = &hisi_hba->sha; struct device *dev = hisi_hba->dev; int phy_no = sas_phy->id; u32 irq_value; @@ -1424,7 +1423,7 @@ static irqreturn_t int_bcast_v1_hw(int irq, void *p) } if (!test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags)) - sha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); + sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); end: hisi_sas_phy_write32(hisi_hba, phy_no, CHL_INT2, diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c index 9adfdefef9ca..10ba0680da04 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c @@ -2818,14 +2818,13 @@ static void phy_bcast_v2_hw(int phy_no, struct hisi_hba *hisi_hba) { struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no]; struct asd_sas_phy *sas_phy = &phy->sas_phy; - struct sas_ha_struct *sas_ha = &hisi_hba->sha; u32 bcast_status; hisi_sas_phy_write32(hisi_hba, phy_no, SL_RX_BCAST_CHK_MSK, 1); bcast_status = hisi_sas_phy_read32(hisi_hba, phy_no, RX_PRIMS_STATUS); if ((bcast_status & RX_BCAST_CHG_MSK) && !test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags)) - sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); + sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); hisi_sas_phy_write32(hisi_hba, phy_no, CHL_INT0, CHL_INT0_SL_RX_BCST_ACK_MSK); hisi_sas_phy_write32(hisi_hba, phy_no, SL_RX_BCAST_CHK_MSK, 0); diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c index 7c12804b4e1d..9d9dcc11a866 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c +++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c @@ -1600,14 +1600,13 @@ static irqreturn_t phy_bcast_v3_hw(int phy_no, struct hisi_hba *hisi_hba) { struct hisi_sas_phy *phy = &hisi_hba->phy[phy_no]; struct asd_sas_phy *sas_phy = &phy->sas_phy; - struct sas_ha_struct *sas_ha = &hisi_hba->sha; u32 bcast_status; hisi_sas_phy_write32(hisi_hba, phy_no, SL_RX_BCAST_CHK_MSK, 1); bcast_status = hisi_sas_phy_read32(hisi_hba, phy_no, RX_PRIMS_STATUS); if ((bcast_status & RX_BCAST_CHG_MSK) && !test_bit(HISI_SAS_RESET_BIT, &hisi_hba->flags)) - sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); + sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); hisi_sas_phy_write32(hisi_hba, phy_no, CHL_INT0, CHL_INT0_SL_RX_BCST_ACK_MSK); hisi_sas_phy_write32(hisi_hba, phy_no, SL_RX_BCAST_CHK_MSK, 0); diff --git a/drivers/scsi/isci/port.c b/drivers/scsi/isci/port.c index 1df45f028ea7..e50c3b0deeb3 100644 --- a/drivers/scsi/isci/port.c +++ b/drivers/scsi/isci/port.c @@ -164,7 +164,8 @@ static void isci_port_bc_change_received(struct isci_host *ihost, "%s: isci_phy = %p, sas_phy = %p\n", __func__, iphy, &iphy->sas_phy); - ihost->sas_ha.notify_port_event(&iphy->sas_phy, PORTE_BROADCAST_RCVD); + sas_notify_port_event_gfp(&iphy->sas_phy, + PORTE_BROADCAST_RCVD, GFP_ATOMIC); sci_port_bcn_enable(iport); } @@ -223,8 +224,8 @@ static void isci_port_link_up(struct isci_host *isci_host, /* Notify libsas that we have an address frame, if indeed * we've found an SSP, SMP, or STP target */ if (success) - isci_host->sas_ha.notify_port_event(&iphy->sas_phy, - PORTE_BYTES_DMAED); + sas_notify_port_event_gfp(&iphy->sas_phy, + PORTE_BYTES_DMAED, GFP_ATOMIC); } @@ -270,8 +271,8 @@ static void isci_port_link_down(struct isci_host *isci_host, * isci_port_deformed and isci_dev_gone functions. */ sas_phy_disconnected(&isci_phy->sas_phy); - isci_host->sas_ha.notify_phy_event(&isci_phy->sas_phy, - PHYE_LOSS_OF_SIGNAL); + sas_notify_phy_event_gfp(&isci_phy->sas_phy, + PHYE_LOSS_OF_SIGNAL, GFP_ATOMIC); dev_dbg(&isci_host->pdev->dev, "%s: isci_port = %p - Done\n", __func__, isci_port); diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 4e668aafbcca..af40de7e51e7 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1532,14 +1532,9 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) } rc = iscsi_prep_scsi_cmd_pdu(conn->task); if (rc) { - if (rc == -ENOMEM || rc == -EACCES) { - spin_lock_bh(&conn->taskqueuelock); - list_add_tail(&conn->task->running, - &conn->cmdqueue); - conn->task = NULL; - spin_unlock_bh(&conn->taskqueuelock); - goto done; - } else + if (rc == -ENOMEM || rc == -EACCES) + fail_scsi_task(conn->task, DID_IMM_RETRY); + else fail_scsi_task(conn->task, DID_ABORT); spin_lock_bh(&conn->taskqueuelock); continue; @@ -3338,125 +3333,125 @@ int iscsi_session_get_param(struct iscsi_cls_session *cls_session, switch(param) { case ISCSI_PARAM_FAST_ABORT: - len = sprintf(buf, "%d\n", session->fast_abort); + len = sysfs_emit(buf, "%d\n", session->fast_abort); break; case ISCSI_PARAM_ABORT_TMO: - len = sprintf(buf, "%d\n", session->abort_timeout); + len = sysfs_emit(buf, "%d\n", session->abort_timeout); break; case ISCSI_PARAM_LU_RESET_TMO: - len = sprintf(buf, "%d\n", session->lu_reset_timeout); + len = sysfs_emit(buf, "%d\n", session->lu_reset_timeout); break; case ISCSI_PARAM_TGT_RESET_TMO: - len = sprintf(buf, "%d\n", session->tgt_reset_timeout); + len = sysfs_emit(buf, "%d\n", session->tgt_reset_timeout); break; case ISCSI_PARAM_INITIAL_R2T_EN: - len = sprintf(buf, "%d\n", session->initial_r2t_en); + len = sysfs_emit(buf, "%d\n", session->initial_r2t_en); break; case ISCSI_PARAM_MAX_R2T: - len = sprintf(buf, "%hu\n", session->max_r2t); + len = sysfs_emit(buf, "%hu\n", session->max_r2t); break; case ISCSI_PARAM_IMM_DATA_EN: - len = sprintf(buf, "%d\n", session->imm_data_en); + len = sysfs_emit(buf, "%d\n", session->imm_data_en); break; case ISCSI_PARAM_FIRST_BURST: - len = sprintf(buf, "%u\n", session->first_burst); + len = sysfs_emit(buf, "%u\n", session->first_burst); break; case ISCSI_PARAM_MAX_BURST: - len = sprintf(buf, "%u\n", session->max_burst); + len = sysfs_emit(buf, "%u\n", session->max_burst); break; case ISCSI_PARAM_PDU_INORDER_EN: - len = sprintf(buf, "%d\n", session->pdu_inorder_en); + len = sysfs_emit(buf, "%d\n", session->pdu_inorder_en); break; case ISCSI_PARAM_DATASEQ_INORDER_EN: - len = sprintf(buf, "%d\n", session->dataseq_inorder_en); + len = sysfs_emit(buf, "%d\n", session->dataseq_inorder_en); break; case ISCSI_PARAM_DEF_TASKMGMT_TMO: - len = sprintf(buf, "%d\n", session->def_taskmgmt_tmo); + len = sysfs_emit(buf, "%d\n", session->def_taskmgmt_tmo); break; case ISCSI_PARAM_ERL: - len = sprintf(buf, "%d\n", session->erl); + len = sysfs_emit(buf, "%d\n", session->erl); break; case ISCSI_PARAM_TARGET_NAME: - len = sprintf(buf, "%s\n", session->targetname); + len = sysfs_emit(buf, "%s\n", session->targetname); break; case ISCSI_PARAM_TARGET_ALIAS: - len = sprintf(buf, "%s\n", session->targetalias); + len = sysfs_emit(buf, "%s\n", session->targetalias); break; case ISCSI_PARAM_TPGT: - len = sprintf(buf, "%d\n", session->tpgt); + len = sysfs_emit(buf, "%d\n", session->tpgt); break; case ISCSI_PARAM_USERNAME: - len = sprintf(buf, "%s\n", session->username); + len = sysfs_emit(buf, "%s\n", session->username); break; case ISCSI_PARAM_USERNAME_IN: - len = sprintf(buf, "%s\n", session->username_in); + len = sysfs_emit(buf, "%s\n", session->username_in); break; case ISCSI_PARAM_PASSWORD: - len = sprintf(buf, "%s\n", session->password); + len = sysfs_emit(buf, "%s\n", session->password); break; case ISCSI_PARAM_PASSWORD_IN: - len = sprintf(buf, "%s\n", session->password_in); + len = sysfs_emit(buf, "%s\n", session->password_in); break; case ISCSI_PARAM_IFACE_NAME: - len = sprintf(buf, "%s\n", session->ifacename); + len = sysfs_emit(buf, "%s\n", session->ifacename); break; case ISCSI_PARAM_INITIATOR_NAME: - len = sprintf(buf, "%s\n", session->initiatorname); + len = sysfs_emit(buf, "%s\n", session->initiatorname); break; case ISCSI_PARAM_BOOT_ROOT: - len = sprintf(buf, "%s\n", session->boot_root); + len = sysfs_emit(buf, "%s\n", session->boot_root); break; case ISCSI_PARAM_BOOT_NIC: - len = sprintf(buf, "%s\n", session->boot_nic); + len = sysfs_emit(buf, "%s\n", session->boot_nic); break; case ISCSI_PARAM_BOOT_TARGET: - len = sprintf(buf, "%s\n", session->boot_target); + len = sysfs_emit(buf, "%s\n", session->boot_target); break; case ISCSI_PARAM_AUTO_SND_TGT_DISABLE: - len = sprintf(buf, "%u\n", session->auto_snd_tgt_disable); + len = sysfs_emit(buf, "%u\n", session->auto_snd_tgt_disable); break; case ISCSI_PARAM_DISCOVERY_SESS: - len = sprintf(buf, "%u\n", session->discovery_sess); + len = sysfs_emit(buf, "%u\n", session->discovery_sess); break; case ISCSI_PARAM_PORTAL_TYPE: - len = sprintf(buf, "%s\n", session->portal_type); + len = sysfs_emit(buf, "%s\n", session->portal_type); break; case ISCSI_PARAM_CHAP_AUTH_EN: - len = sprintf(buf, "%u\n", session->chap_auth_en); + len = sysfs_emit(buf, "%u\n", session->chap_auth_en); break; case ISCSI_PARAM_DISCOVERY_LOGOUT_EN: - len = sprintf(buf, "%u\n", session->discovery_logout_en); + len = sysfs_emit(buf, "%u\n", session->discovery_logout_en); break; case ISCSI_PARAM_BIDI_CHAP_EN: - len = sprintf(buf, "%u\n", session->bidi_chap_en); + len = sysfs_emit(buf, "%u\n", session->bidi_chap_en); break; case ISCSI_PARAM_DISCOVERY_AUTH_OPTIONAL: - len = sprintf(buf, "%u\n", session->discovery_auth_optional); + len = sysfs_emit(buf, "%u\n", session->discovery_auth_optional); break; case ISCSI_PARAM_DEF_TIME2WAIT: - len = sprintf(buf, "%d\n", session->time2wait); + len = sysfs_emit(buf, "%d\n", session->time2wait); break; case ISCSI_PARAM_DEF_TIME2RETAIN: - len = sprintf(buf, "%d\n", session->time2retain); + len = sysfs_emit(buf, "%d\n", session->time2retain); break; case ISCSI_PARAM_TSID: - len = sprintf(buf, "%u\n", session->tsid); + len = sysfs_emit(buf, "%u\n", session->tsid); break; case ISCSI_PARAM_ISID: - len = sprintf(buf, "%02x%02x%02x%02x%02x%02x\n", + len = sysfs_emit(buf, "%02x%02x%02x%02x%02x%02x\n", session->isid[0], session->isid[1], session->isid[2], session->isid[3], session->isid[4], session->isid[5]); break; case ISCSI_PARAM_DISCOVERY_PARENT_IDX: - len = sprintf(buf, "%u\n", session->discovery_parent_idx); + len = sysfs_emit(buf, "%u\n", session->discovery_parent_idx); break; case ISCSI_PARAM_DISCOVERY_PARENT_TYPE: if (session->discovery_parent_type) - len = sprintf(buf, "%s\n", + len = sysfs_emit(buf, "%s\n", session->discovery_parent_type); else - len = sprintf(buf, "\n"); + len = sysfs_emit(buf, "\n"); break; default: return -ENOSYS; @@ -3488,16 +3483,16 @@ int iscsi_conn_get_addr_param(struct sockaddr_storage *addr, case ISCSI_PARAM_CONN_ADDRESS: case ISCSI_HOST_PARAM_IPADDRESS: if (sin) - len = sprintf(buf, "%pI4\n", &sin->sin_addr.s_addr); + len = sysfs_emit(buf, "%pI4\n", &sin->sin_addr.s_addr); else - len = sprintf(buf, "%pI6\n", &sin6->sin6_addr); + len = sysfs_emit(buf, "%pI6\n", &sin6->sin6_addr); break; case ISCSI_PARAM_CONN_PORT: case ISCSI_PARAM_LOCAL_PORT: if (sin) - len = sprintf(buf, "%hu\n", be16_to_cpu(sin->sin_port)); + len = sysfs_emit(buf, "%hu\n", be16_to_cpu(sin->sin_port)); else - len = sprintf(buf, "%hu\n", + len = sysfs_emit(buf, "%hu\n", be16_to_cpu(sin6->sin6_port)); break; default: @@ -3516,88 +3511,88 @@ int iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn, switch(param) { case ISCSI_PARAM_PING_TMO: - len = sprintf(buf, "%u\n", conn->ping_timeout); + len = sysfs_emit(buf, "%u\n", conn->ping_timeout); break; case ISCSI_PARAM_RECV_TMO: - len = sprintf(buf, "%u\n", conn->recv_timeout); + len = sysfs_emit(buf, "%u\n", conn->recv_timeout); break; case ISCSI_PARAM_MAX_RECV_DLENGTH: - len = sprintf(buf, "%u\n", conn->max_recv_dlength); + len = sysfs_emit(buf, "%u\n", conn->max_recv_dlength); break; case ISCSI_PARAM_MAX_XMIT_DLENGTH: - len = sprintf(buf, "%u\n", conn->max_xmit_dlength); + len = sysfs_emit(buf, "%u\n", conn->max_xmit_dlength); break; case ISCSI_PARAM_HDRDGST_EN: - len = sprintf(buf, "%d\n", conn->hdrdgst_en); + len = sysfs_emit(buf, "%d\n", conn->hdrdgst_en); break; case ISCSI_PARAM_DATADGST_EN: - len = sprintf(buf, "%d\n", conn->datadgst_en); + len = sysfs_emit(buf, "%d\n", conn->datadgst_en); break; case ISCSI_PARAM_IFMARKER_EN: - len = sprintf(buf, "%d\n", conn->ifmarker_en); + len = sysfs_emit(buf, "%d\n", conn->ifmarker_en); break; case ISCSI_PARAM_OFMARKER_EN: - len = sprintf(buf, "%d\n", conn->ofmarker_en); + len = sysfs_emit(buf, "%d\n", conn->ofmarker_en); break; case ISCSI_PARAM_EXP_STATSN: - len = sprintf(buf, "%u\n", conn->exp_statsn); + len = sysfs_emit(buf, "%u\n", conn->exp_statsn); break; case ISCSI_PARAM_PERSISTENT_PORT: - len = sprintf(buf, "%d\n", conn->persistent_port); + len = sysfs_emit(buf, "%d\n", conn->persistent_port); break; case ISCSI_PARAM_PERSISTENT_ADDRESS: - len = sprintf(buf, "%s\n", conn->persistent_address); + len = sysfs_emit(buf, "%s\n", conn->persistent_address); break; case ISCSI_PARAM_STATSN: - len = sprintf(buf, "%u\n", conn->statsn); + len = sysfs_emit(buf, "%u\n", conn->statsn); break; case ISCSI_PARAM_MAX_SEGMENT_SIZE: - len = sprintf(buf, "%u\n", conn->max_segment_size); + len = sysfs_emit(buf, "%u\n", conn->max_segment_size); break; case ISCSI_PARAM_KEEPALIVE_TMO: - len = sprintf(buf, "%u\n", conn->keepalive_tmo); + len = sysfs_emit(buf, "%u\n", conn->keepalive_tmo); break; case ISCSI_PARAM_LOCAL_PORT: - len = sprintf(buf, "%u\n", conn->local_port); + len = sysfs_emit(buf, "%u\n", conn->local_port); break; case ISCSI_PARAM_TCP_TIMESTAMP_STAT: - len = sprintf(buf, "%u\n", conn->tcp_timestamp_stat); + len = sysfs_emit(buf, "%u\n", conn->tcp_timestamp_stat); break; case ISCSI_PARAM_TCP_NAGLE_DISABLE: - len = sprintf(buf, "%u\n", conn->tcp_nagle_disable); + len = sysfs_emit(buf, "%u\n", conn->tcp_nagle_disable); break; case ISCSI_PARAM_TCP_WSF_DISABLE: - len = sprintf(buf, "%u\n", conn->tcp_wsf_disable); + len = sysfs_emit(buf, "%u\n", conn->tcp_wsf_disable); break; case ISCSI_PARAM_TCP_TIMER_SCALE: - len = sprintf(buf, "%u\n", conn->tcp_timer_scale); + len = sysfs_emit(buf, "%u\n", conn->tcp_timer_scale); break; case ISCSI_PARAM_TCP_TIMESTAMP_EN: - len = sprintf(buf, "%u\n", conn->tcp_timestamp_en); + len = sysfs_emit(buf, "%u\n", conn->tcp_timestamp_en); break; case ISCSI_PARAM_IP_FRAGMENT_DISABLE: - len = sprintf(buf, "%u\n", conn->fragment_disable); + len = sysfs_emit(buf, "%u\n", conn->fragment_disable); break; case ISCSI_PARAM_IPV4_TOS: - len = sprintf(buf, "%u\n", conn->ipv4_tos); + len = sysfs_emit(buf, "%u\n", conn->ipv4_tos); break; case ISCSI_PARAM_IPV6_TC: - len = sprintf(buf, "%u\n", conn->ipv6_traffic_class); + len = sysfs_emit(buf, "%u\n", conn->ipv6_traffic_class); break; case ISCSI_PARAM_IPV6_FLOW_LABEL: - len = sprintf(buf, "%u\n", conn->ipv6_flow_label); + len = sysfs_emit(buf, "%u\n", conn->ipv6_flow_label); break; case ISCSI_PARAM_IS_FW_ASSIGNED_IPV6: - len = sprintf(buf, "%u\n", conn->is_fw_assigned_ipv6); + len = sysfs_emit(buf, "%u\n", conn->is_fw_assigned_ipv6); break; case ISCSI_PARAM_TCP_XMIT_WSF: - len = sprintf(buf, "%u\n", conn->tcp_xmit_wsf); + len = sysfs_emit(buf, "%u\n", conn->tcp_xmit_wsf); break; case ISCSI_PARAM_TCP_RECV_WSF: - len = sprintf(buf, "%u\n", conn->tcp_recv_wsf); + len = sysfs_emit(buf, "%u\n", conn->tcp_recv_wsf); break; case ISCSI_PARAM_LOCAL_IPADDR: - len = sprintf(buf, "%s\n", conn->local_ipaddr); + len = sysfs_emit(buf, "%s\n", conn->local_ipaddr); break; default: return -ENOSYS; @@ -3615,13 +3610,13 @@ int iscsi_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param, switch (param) { case ISCSI_HOST_PARAM_NETDEV_NAME: - len = sprintf(buf, "%s\n", ihost->netdev); + len = sysfs_emit(buf, "%s\n", ihost->netdev); break; case ISCSI_HOST_PARAM_HWADDRESS: - len = sprintf(buf, "%s\n", ihost->hwaddress); + len = sysfs_emit(buf, "%s\n", ihost->hwaddress); break; case ISCSI_HOST_PARAM_INITIATOR_NAME: - len = sprintf(buf, "%s\n", ihost->initiatorname); + len = sysfs_emit(buf, "%s\n", ihost->initiatorname); break; default: return -ENOSYS; diff --git a/drivers/scsi/libsas/sas_event.c b/drivers/scsi/libsas/sas_event.c index a1852f6c042b..ba266a17250a 100644 --- a/drivers/scsi/libsas/sas_event.c +++ b/drivers/scsi/libsas/sas_event.c @@ -109,7 +109,7 @@ void sas_enable_revalidation(struct sas_ha_struct *ha) sas_phy = container_of(port->phy_list.next, struct asd_sas_phy, port_phy_el); - ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); + sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); } mutex_unlock(&ha->disco_mutex); } @@ -131,18 +131,15 @@ static void sas_phy_event_worker(struct work_struct *work) sas_free_event(ev); } -static int sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event) +static int __sas_notify_port_event(struct asd_sas_phy *phy, + enum port_event event, + struct asd_sas_event *ev) { - struct asd_sas_event *ev; struct sas_ha_struct *ha = phy->ha; int ret; BUG_ON(event >= PORT_NUM_EVENTS); - ev = sas_alloc_event(phy); - if (!ev) - return -ENOMEM; - INIT_SAS_EVENT(ev, sas_port_event_worker, phy, event); ret = sas_queue_event(event, &ev->work, ha); @@ -152,18 +149,40 @@ static int sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event) return ret; } -int sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event) +int sas_notify_port_event_gfp(struct asd_sas_phy *phy, enum port_event event, + gfp_t gfp_flags) { struct asd_sas_event *ev; - struct sas_ha_struct *ha = phy->ha; - int ret; - BUG_ON(event >= PHY_NUM_EVENTS); + ev = sas_alloc_event_gfp(phy, gfp_flags); + if (!ev) + return -ENOMEM; + + return __sas_notify_port_event(phy, event, ev); +} +EXPORT_SYMBOL_GPL(sas_notify_port_event_gfp); + +int sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event) +{ + struct asd_sas_event *ev; ev = sas_alloc_event(phy); if (!ev) return -ENOMEM; + return __sas_notify_port_event(phy, event, ev); +} +EXPORT_SYMBOL_GPL(sas_notify_port_event); + +static inline int __sas_notify_phy_event(struct asd_sas_phy *phy, + enum phy_event event, + struct asd_sas_event *ev) +{ + struct sas_ha_struct *ha = phy->ha; + int ret; + + BUG_ON(event >= PHY_NUM_EVENTS); + INIT_SAS_EVENT(ev, sas_phy_event_worker, phy, event); ret = sas_queue_event(event, &ev->work, ha); @@ -173,10 +192,27 @@ int sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event) return ret; } -int sas_init_events(struct sas_ha_struct *sas_ha) +int sas_notify_phy_event_gfp(struct asd_sas_phy *phy, enum phy_event event, + gfp_t gfp_flags) { - sas_ha->notify_port_event = sas_notify_port_event; - sas_ha->notify_phy_event = sas_notify_phy_event; + struct asd_sas_event *ev; - return 0; + ev = sas_alloc_event_gfp(phy, gfp_flags); + if (!ev) + return -ENOMEM; + + return __sas_notify_phy_event(phy, event, ev); +} +EXPORT_SYMBOL_GPL(sas_notify_phy_event_gfp); + +int sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event) +{ + struct asd_sas_event *ev; + + ev = sas_alloc_event(phy); + if (!ev) + return -ENOMEM; + + return __sas_notify_phy_event(phy, event, ev); } +EXPORT_SYMBOL_GPL(sas_notify_phy_event); diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c index 21c43b18d5d5..f8ae1f0f17d3 100644 --- a/drivers/scsi/libsas/sas_init.c +++ b/drivers/scsi/libsas/sas_init.c @@ -123,12 +123,6 @@ int sas_register_ha(struct sas_ha_struct *sas_ha) goto Undo_phys; } - error = sas_init_events(sas_ha); - if (error) { - pr_notice("couldn't start event thread:%d\n", error); - goto Undo_ports; - } - error = -ENOMEM; snprintf(name, sizeof(name), "%s_event_q", dev_name(sas_ha->dev)); sas_ha->event_q = create_singlethread_workqueue(name); @@ -590,16 +584,15 @@ sas_domain_attach_transport(struct sas_domain_function_template *dft) } EXPORT_SYMBOL_GPL(sas_domain_attach_transport); - -struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy) +static struct asd_sas_event *__sas_alloc_event(struct asd_sas_phy *phy, + gfp_t gfp_flags) { struct asd_sas_event *event; - gfp_t flags = in_interrupt() ? GFP_ATOMIC : GFP_KERNEL; struct sas_ha_struct *sas_ha = phy->ha; struct sas_internal *i = to_sas_internal(sas_ha->core.shost->transportt); - event = kmem_cache_zalloc(sas_event_cache, flags); + event = kmem_cache_zalloc(sas_event_cache, gfp_flags); if (!event) return NULL; @@ -610,7 +603,8 @@ struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy) if (cmpxchg(&phy->in_shutdown, 0, 1) == 0) { pr_notice("The phy%d bursting events, shut it down.\n", phy->id); - sas_notify_phy_event(phy, PHYE_SHUTDOWN); + sas_notify_phy_event_gfp(phy, PHYE_SHUTDOWN, + gfp_flags); } } else { /* Do not support PHY control, stop allocating events */ @@ -624,6 +618,17 @@ struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy) return event; } +struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy) +{ + return __sas_alloc_event(phy, in_interrupt() ? GFP_ATOMIC : GFP_KERNEL); +} + +struct asd_sas_event *sas_alloc_event_gfp(struct asd_sas_phy *phy, + gfp_t gfp_flags) +{ + return __sas_alloc_event(phy, gfp_flags); +} + void sas_free_event(struct asd_sas_event *event) { struct asd_sas_phy *phy = event->phy; diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h index 1f1d01901978..52e09c3e2b50 100644 --- a/drivers/scsi/libsas/sas_internal.h +++ b/drivers/scsi/libsas/sas_internal.h @@ -49,12 +49,13 @@ int sas_register_phys(struct sas_ha_struct *sas_ha); void sas_unregister_phys(struct sas_ha_struct *sas_ha); struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy); +struct asd_sas_event *sas_alloc_event_gfp(struct asd_sas_phy *phy, + gfp_t gfp_flags); void sas_free_event(struct asd_sas_event *event); int sas_register_ports(struct sas_ha_struct *sas_ha); void sas_unregister_ports(struct sas_ha_struct *sas_ha); -int sas_init_events(struct sas_ha_struct *sas_ha); void sas_disable_revalidation(struct sas_ha_struct *ha); void sas_enable_revalidation(struct sas_ha_struct *ha); void __sas_drain_work(struct sas_ha_struct *ha); @@ -78,6 +79,8 @@ int sas_smp_phy_control(struct domain_device *dev, int phy_id, int sas_smp_get_phy_events(struct sas_phy *phy); int sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event); +int sas_notify_phy_event_gfp(struct asd_sas_phy *phy, enum phy_event event, + gfp_t flags); void sas_device_set_phy(struct domain_device *dev, struct sas_port *port); struct domain_device *sas_find_dev_by_rphy(struct sas_rphy *rphy); struct domain_device *sas_ex_to_ata(struct domain_device *ex_dev, int phy_id); diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index bc79a017e1a2..46a8f2d1d2b8 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -2421,7 +2421,7 @@ lpfc_debugfs_dif_err_write(struct file *file, const char __user *buf, memset(dstbuf, 0, 33); size = (nbytes < 32) ? nbytes : 32; if (copy_from_user(dstbuf, buf, size)) - return 0; + return -EFAULT; if (dent == phba->debug_InjErrLBA) { if ((dstbuf[0] == 'o') && (dstbuf[1] == 'f') && @@ -2430,7 +2430,7 @@ lpfc_debugfs_dif_err_write(struct file *file, const char __user *buf, } if ((tmp == 0) && (kstrtoull(dstbuf, 0, &tmp))) - return 0; + return -EINVAL; if (dent == phba->debug_writeGuard) phba->lpfc_injerr_wgrd_cnt = (uint32_t)tmp; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 2b6b5fc671fe..e5ace4a4f432 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -1145,13 +1145,14 @@ lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) struct lpfc_vport *vport = pmb->vport; LPFC_MBOXQ_t *sparam_mb; struct lpfc_dmabuf *sparam_mp; + u16 status = pmb->u.mb.mbxStatus; int rc; - if (pmb->u.mb.mbxStatus) - goto out; - mempool_free(pmb, phba->mbox_mem_pool); + if (status) + goto out; + /* don't perform discovery for SLI4 loopback diagnostic test */ if ((phba->sli_rev == LPFC_SLI_REV4) && !(phba->hba_flag & HBA_FCOE_MODE) && @@ -1214,12 +1215,10 @@ lpfc_mbx_cmpl_local_config_link(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb) out: lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, - "0306 CONFIG_LINK mbxStatus error x%x " - "HBA state x%x\n", - pmb->u.mb.mbxStatus, vport->port_state); -sparam_out: - mempool_free(pmb, phba->mbox_mem_pool); + "0306 CONFIG_LINK mbxStatus error x%x HBA state x%x\n", + status, vport->port_state); +sparam_out: lpfc_linkdown(phba); lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT, diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index 6e23dc3209fe..340d435ac0ce 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -7789,14 +7789,18 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc) ioc->pend_os_device_add_sz++; ioc->pend_os_device_add = kzalloc(ioc->pend_os_device_add_sz, GFP_KERNEL); - if (!ioc->pend_os_device_add) + if (!ioc->pend_os_device_add) { + r = -ENOMEM; goto out_free_resources; + } ioc->device_remove_in_progress_sz = ioc->pend_os_device_add_sz; ioc->device_remove_in_progress = kzalloc(ioc->device_remove_in_progress_sz, GFP_KERNEL); - if (!ioc->device_remove_in_progress) + if (!ioc->device_remove_in_progress) { + r = -ENOMEM; goto out_free_resources; + } ioc->fwfault_debug = mpt3sas_fwfault_debug; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index c8b09a81834d..72439d6aa057 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -407,7 +407,7 @@ mpt3sas_get_port_by_id(struct MPT3SAS_ADAPTER *ioc, * And add this object to port_table_list. */ if (!ioc->multipath_on_hba) { - port = kzalloc(sizeof(struct hba_port), GFP_KERNEL); + port = kzalloc(sizeof(struct hba_port), GFP_ATOMIC); if (!port) return NULL; diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c index a920eced92ec..484e01428da2 100644 --- a/drivers/scsi/mvsas/mv_sas.c +++ b/drivers/scsi/mvsas/mv_sas.c @@ -216,11 +216,11 @@ void mvs_set_sas_addr(struct mvs_info *mvi, int port_id, u32 off_lo, MVS_CHIP_DISP->write_port_cfg_data(mvi, port_id, hi); } -static void mvs_bytes_dmaed(struct mvs_info *mvi, int i) +static void mvs_bytes_dmaed(struct mvs_info *mvi, int i, gfp_t gfp_flags) { struct mvs_phy *phy = &mvi->phy[i]; struct asd_sas_phy *sas_phy = &phy->sas_phy; - struct sas_ha_struct *sas_ha; + if (!phy->phy_attached) return; @@ -229,8 +229,7 @@ static void mvs_bytes_dmaed(struct mvs_info *mvi, int i) return; } - sas_ha = mvi->sas; - sas_ha->notify_phy_event(sas_phy, PHYE_OOB_DONE); + sas_notify_phy_event_gfp(sas_phy, PHYE_OOB_DONE, gfp_flags); if (sas_phy->phy) { struct sas_phy *sphy = sas_phy->phy; @@ -262,8 +261,7 @@ static void mvs_bytes_dmaed(struct mvs_info *mvi, int i) sas_phy->frame_rcvd_size = phy->frame_rcvd_size; - mvi->sas->notify_port_event(sas_phy, - PORTE_BYTES_DMAED); + sas_notify_port_event_gfp(sas_phy, PORTE_BYTES_DMAED, gfp_flags); } void mvs_scan_start(struct Scsi_Host *shost) @@ -279,7 +277,7 @@ void mvs_scan_start(struct Scsi_Host *shost) for (j = 0; j < core_nr; j++) { mvi = ((struct mvs_prv_info *)sha->lldd_ha)->mvi[j]; for (i = 0; i < mvi->chip->n_phy; ++i) - mvs_bytes_dmaed(mvi, i); + mvs_bytes_dmaed(mvi, i, GFP_KERNEL); } mvs_prv->scan_finished = 1; } @@ -1880,7 +1878,6 @@ static void mvs_work_queue(struct work_struct *work) struct mvs_info *mvi = mwq->mvi; unsigned long flags; u32 phy_no = (unsigned long) mwq->data; - struct sas_ha_struct *sas_ha = mvi->sas; struct mvs_phy *phy = &mvi->phy[phy_no]; struct asd_sas_phy *sas_phy = &phy->sas_phy; @@ -1895,21 +1892,21 @@ static void mvs_work_queue(struct work_struct *work) if (!(tmp & PHY_READY_MASK)) { sas_phy_disconnected(sas_phy); mvs_phy_disconnected(phy); - sas_ha->notify_phy_event(sas_phy, - PHYE_LOSS_OF_SIGNAL); + sas_notify_phy_event_gfp(sas_phy, + PHYE_LOSS_OF_SIGNAL, GFP_ATOMIC); mv_dprintk("phy%d Removed Device\n", phy_no); } else { MVS_CHIP_DISP->detect_porttype(mvi, phy_no); mvs_update_phyinfo(mvi, phy_no, 1); - mvs_bytes_dmaed(mvi, phy_no); + mvs_bytes_dmaed(mvi, phy_no, GFP_ATOMIC); mvs_port_notify_formed(sas_phy, 0); mv_dprintk("phy%d Attached Device\n", phy_no); } } } else if (mwq->handler & EXP_BRCT_CHG) { phy->phy_event &= ~EXP_BRCT_CHG; - sas_ha->notify_port_event(sas_phy, - PORTE_BROADCAST_RCVD); + sas_notify_port_event_gfp(sas_phy, + PORTE_BROADCAST_RCVD, GFP_ATOMIC); mv_dprintk("phy%d Got Broadcast Change\n", phy_no); } list_del(&mwq->entry); @@ -2026,7 +2023,7 @@ void mvs_int_port(struct mvs_info *mvi, int phy_no, u32 events) mdelay(10); } - mvs_bytes_dmaed(mvi, phy_no); + mvs_bytes_dmaed(mvi, phy_no, GFP_ATOMIC); /* whether driver is going to handle hot plug */ if (phy->phy_event & PHY_PLUG_OUT) { mvs_port_notify_formed(&phy->sas_phy, 0); diff --git a/drivers/scsi/myrs.c b/drivers/scsi/myrs.c index 4adf9ded296a..329fd025c718 100644 --- a/drivers/scsi/myrs.c +++ b/drivers/scsi/myrs.c @@ -2273,12 +2273,12 @@ static void myrs_cleanup(struct myrs_hba *cs) if (cs->mmio_base) { cs->disable_intr(cs); iounmap(cs->mmio_base); + cs->mmio_base = NULL; } if (cs->irq) free_irq(cs->irq, cs); if (cs->io_addr) release_region(cs->io_addr, 0x80); - iounmap(cs->mmio_base); pci_set_drvdata(pdev, NULL); pci_disable_device(pdev); scsi_host_put(cs->host); diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c index c8d4d87c5473..ea43dff40a85 100644 --- a/drivers/scsi/pm8001/pm8001_hwi.c +++ b/drivers/scsi/pm8001/pm8001_hwi.c @@ -3038,8 +3038,8 @@ void pm8001_mpi_set_nvmd_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) complete(pm8001_ha->nvmd_completion); pm8001_dbg(pm8001_ha, MSG, "Set nvm data complete!\n"); if ((dlen_status & NVMD_STAT) != 0) { - pm8001_dbg(pm8001_ha, FAIL, "Set nvm data error!\n"); - return; + pm8001_dbg(pm8001_ha, FAIL, "Set nvm data error %x\n", + dlen_status); } ccb->task = NULL; ccb->ccb_tag = 0xFFFFFFFF; @@ -3062,11 +3062,17 @@ pm8001_mpi_get_nvmd_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) pm8001_dbg(pm8001_ha, MSG, "Get nvm data complete!\n"); if ((dlen_status & NVMD_STAT) != 0) { - pm8001_dbg(pm8001_ha, FAIL, "Get nvm data error!\n"); + pm8001_dbg(pm8001_ha, FAIL, "Get nvm data error %x\n", + dlen_status); complete(pm8001_ha->nvmd_completion); + /* We should free tag during failure also, the tag is not being + * freed by requesting path anywhere. + */ + ccb->task = NULL; + ccb->ccb_tag = 0xFFFFFFFF; + pm8001_tag_free(pm8001_ha, tag); return; } - if (ir_tds_bn_dps_das_nvm & IPMode) { /* indirect mode - IR bit set */ pm8001_dbg(pm8001_ha, MSG, "Get NVMD success, IR=1\n"); @@ -3179,7 +3185,7 @@ void pm8001_bytes_dmaed(struct pm8001_hba_info *pm8001_ha, int i) pm8001_dbg(pm8001_ha, MSG, "phy %d byte dmaded.\n", i); sas_phy->frame_rcvd_size = phy->frame_rcvd_size; - pm8001_ha->sas->notify_port_event(sas_phy, PORTE_BYTES_DMAED); + sas_notify_port_event(sas_phy, PORTE_BYTES_DMAED); } /* Get the link rate speed */ @@ -3293,7 +3299,6 @@ hw_event_sas_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb) u32 npip_portstate = le32_to_cpu(pPayload->npip_portstate); u8 portstate = (u8)(npip_portstate & 0x0000000F); struct pm8001_port *port = &pm8001_ha->port[port_id]; - struct sas_ha_struct *sas_ha = pm8001_ha->sas; struct pm8001_phy *phy = &pm8001_ha->phy[phy_id]; unsigned long flags; u8 deviceType = pPayload->sas_identify.dev_type; @@ -3337,7 +3342,7 @@ hw_event_sas_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb) else if (phy->identify.device_type != SAS_PHY_UNUSED) phy->identify.target_port_protocols = SAS_PROTOCOL_SMP; phy->sas_phy.oob_mode = SAS_OOB_MODE; - sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE); + sas_notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE); spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags); memcpy(phy->frame_rcvd, &pPayload->sas_identify, sizeof(struct sas_identify_frame)-4); @@ -3369,7 +3374,6 @@ hw_event_sata_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb) u32 npip_portstate = le32_to_cpu(pPayload->npip_portstate); u8 portstate = (u8)(npip_portstate & 0x0000000F); struct pm8001_port *port = &pm8001_ha->port[port_id]; - struct sas_ha_struct *sas_ha = pm8001_ha->sas; struct pm8001_phy *phy = &pm8001_ha->phy[phy_id]; unsigned long flags; pm8001_dbg(pm8001_ha, DEVIO, "HW_EVENT_SATA_PHY_UP port id = %d, phy id = %d\n", @@ -3381,7 +3385,7 @@ hw_event_sata_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb) phy->phy_type |= PORT_TYPE_SATA; phy->phy_attached = 1; phy->sas_phy.oob_mode = SATA_OOB_MODE; - sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE); + sas_notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE); spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags); memcpy(phy->frame_rcvd, ((u8 *)&pPayload->sata_fis - 4), sizeof(struct dev_to_host_fis)); @@ -3728,11 +3732,11 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb) break; case HW_EVENT_SATA_SPINUP_HOLD: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_SATA_SPINUP_HOLD\n"); - sas_ha->notify_phy_event(&phy->sas_phy, PHYE_SPINUP_HOLD); + sas_notify_phy_event(&phy->sas_phy, PHYE_SPINUP_HOLD); break; case HW_EVENT_PHY_DOWN: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PHY_DOWN\n"); - sas_ha->notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL); + sas_notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL); phy->phy_attached = 0; phy->phy_state = 0; hw_event_phy_down(pm8001_ha, piomb); @@ -3741,7 +3745,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb) pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PORT_INVALID\n"); sas_phy_disconnected(sas_phy); phy->phy_attached = 0; - sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); break; /* the broadcast change primitive received, tell the LIBSAS this event to revalidate the sas domain*/ @@ -3752,20 +3756,20 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb) spin_lock_irqsave(&sas_phy->sas_prim_lock, flags); sas_phy->sas_prim = HW_EVENT_BROADCAST_CHANGE; spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags); - sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); + sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); break; case HW_EVENT_PHY_ERROR: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PHY_ERROR\n"); sas_phy_disconnected(&phy->sas_phy); phy->phy_attached = 0; - sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_ERROR); + sas_notify_phy_event(&phy->sas_phy, PHYE_OOB_ERROR); break; case HW_EVENT_BROADCAST_EXP: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_BROADCAST_EXP\n"); spin_lock_irqsave(&sas_phy->sas_prim_lock, flags); sas_phy->sas_prim = HW_EVENT_BROADCAST_EXP; spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags); - sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); + sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); break; case HW_EVENT_LINK_ERR_INVALID_DWORD: pm8001_dbg(pm8001_ha, MSG, @@ -3774,7 +3778,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb) HW_EVENT_LINK_ERR_INVALID_DWORD, port_id, phy_id, 0, 0); sas_phy_disconnected(sas_phy); phy->phy_attached = 0; - sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); break; case HW_EVENT_LINK_ERR_DISPARITY_ERROR: pm8001_dbg(pm8001_ha, MSG, @@ -3784,7 +3788,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb) port_id, phy_id, 0, 0); sas_phy_disconnected(sas_phy); phy->phy_attached = 0; - sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); break; case HW_EVENT_LINK_ERR_CODE_VIOLATION: pm8001_dbg(pm8001_ha, MSG, @@ -3794,7 +3798,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb) port_id, phy_id, 0, 0); sas_phy_disconnected(sas_phy); phy->phy_attached = 0; - sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); break; case HW_EVENT_LINK_ERR_LOSS_OF_DWORD_SYNCH: pm8001_dbg(pm8001_ha, MSG, @@ -3804,7 +3808,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb) port_id, phy_id, 0, 0); sas_phy_disconnected(sas_phy); phy->phy_attached = 0; - sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); break; case HW_EVENT_MALFUNCTION: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_MALFUNCTION\n"); @@ -3814,7 +3818,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb) spin_lock_irqsave(&sas_phy->sas_prim_lock, flags); sas_phy->sas_prim = HW_EVENT_BROADCAST_SES; spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags); - sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); + sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); break; case HW_EVENT_INBOUND_CRC_ERROR: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_INBOUND_CRC_ERROR\n"); @@ -3824,13 +3828,13 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb) break; case HW_EVENT_HARD_RESET_RECEIVED: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_HARD_RESET_RECEIVED\n"); - sas_ha->notify_port_event(sas_phy, PORTE_HARD_RESET); + sas_notify_port_event(sas_phy, PORTE_HARD_RESET); break; case HW_EVENT_ID_FRAME_TIMEOUT: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_ID_FRAME_TIMEOUT\n"); sas_phy_disconnected(sas_phy); phy->phy_attached = 0; - sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); break; case HW_EVENT_LINK_ERR_PHY_RESET_FAILED: pm8001_dbg(pm8001_ha, MSG, @@ -3840,20 +3844,20 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void* piomb) port_id, phy_id, 0, 0); sas_phy_disconnected(sas_phy); phy->phy_attached = 0; - sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); break; case HW_EVENT_PORT_RESET_TIMER_TMO: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PORT_RESET_TIMER_TMO\n"); sas_phy_disconnected(sas_phy); phy->phy_attached = 0; - sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); break; case HW_EVENT_PORT_RECOVERY_TIMER_TMO: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PORT_RECOVERY_TIMER_TMO\n"); sas_phy_disconnected(sas_phy); phy->phy_attached = 0; - sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); break; case HW_EVENT_PORT_RECOVER: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PORT_RECOVER\n"); diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c index d1e9dba2ef19..e21c6cfff4cb 100644 --- a/drivers/scsi/pm8001/pm8001_sas.c +++ b/drivers/scsi/pm8001/pm8001_sas.c @@ -158,7 +158,6 @@ int pm8001_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func, int rc = 0, phy_id = sas_phy->id; struct pm8001_hba_info *pm8001_ha = NULL; struct sas_phy_linkrates *rates; - struct sas_ha_struct *sas_ha; struct pm8001_phy *phy; DECLARE_COMPLETION_ONSTACK(completion); unsigned long flags; @@ -207,18 +206,16 @@ int pm8001_phy_control(struct asd_sas_phy *sas_phy, enum phy_func func, if (pm8001_ha->chip_id != chip_8001) { if (pm8001_ha->phy[phy_id].phy_state == PHY_STATE_LINK_UP_SPCV) { - sas_ha = pm8001_ha->sas; sas_phy_disconnected(&phy->sas_phy); - sas_ha->notify_phy_event(&phy->sas_phy, + sas_notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL); phy->phy_attached = 0; } } else { if (pm8001_ha->phy[phy_id].phy_state == PHY_STATE_LINK_UP_SPC) { - sas_ha = pm8001_ha->sas; sas_phy_disconnected(&phy->sas_phy); - sas_ha->notify_phy_event(&phy->sas_phy, + sas_notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL); phy->phy_attached = 0; } diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c index 6772b0924dac..f617177b7bb3 100644 --- a/drivers/scsi/pm8001/pm80xx_hwi.c +++ b/drivers/scsi/pm8001/pm80xx_hwi.c @@ -3243,7 +3243,6 @@ hw_event_sas_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb) u8 portstate = (u8)(phyid_npip_portstate & 0x0000000F); struct pm8001_port *port = &pm8001_ha->port[port_id]; - struct sas_ha_struct *sas_ha = pm8001_ha->sas; struct pm8001_phy *phy = &pm8001_ha->phy[phy_id]; unsigned long flags; u8 deviceType = pPayload->sas_identify.dev_type; @@ -3288,7 +3287,7 @@ hw_event_sas_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb) else if (phy->identify.device_type != SAS_PHY_UNUSED) phy->identify.target_port_protocols = SAS_PROTOCOL_SMP; phy->sas_phy.oob_mode = SAS_OOB_MODE; - sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE); + sas_notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE); spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags); memcpy(phy->frame_rcvd, &pPayload->sas_identify, sizeof(struct sas_identify_frame)-4); @@ -3322,7 +3321,6 @@ hw_event_sata_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb) u8 portstate = (u8)(phyid_npip_portstate & 0x0000000F); struct pm8001_port *port = &pm8001_ha->port[port_id]; - struct sas_ha_struct *sas_ha = pm8001_ha->sas; struct pm8001_phy *phy = &pm8001_ha->phy[phy_id]; unsigned long flags; pm8001_dbg(pm8001_ha, DEVIO, @@ -3336,7 +3334,7 @@ hw_event_sata_phy_up(struct pm8001_hba_info *pm8001_ha, void *piomb) phy->phy_type |= PORT_TYPE_SATA; phy->phy_attached = 1; phy->sas_phy.oob_mode = SATA_OOB_MODE; - sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE); + sas_notify_phy_event(&phy->sas_phy, PHYE_OOB_DONE); spin_lock_irqsave(&phy->sas_phy.frame_rcvd_lock, flags); memcpy(phy->frame_rcvd, ((u8 *)&pPayload->sata_fis - 4), sizeof(struct dev_to_host_fis)); @@ -3418,11 +3416,8 @@ hw_event_phy_down(struct pm8001_hba_info *pm8001_ha, void *piomb) break; } - if (port_sata && (portstate != PORT_IN_RESET)) { - struct sas_ha_struct *sas_ha = pm8001_ha->sas; - - sas_ha->notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL); - } + if (port_sata && (portstate != PORT_IN_RESET)) + sas_notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL); } static int mpi_phy_start_resp(struct pm8001_hba_info *pm8001_ha, void *piomb) @@ -3520,7 +3515,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb) break; case HW_EVENT_SATA_SPINUP_HOLD: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_SATA_SPINUP_HOLD\n"); - sas_ha->notify_phy_event(&phy->sas_phy, PHYE_SPINUP_HOLD); + sas_notify_phy_event(&phy->sas_phy, PHYE_SPINUP_HOLD); break; case HW_EVENT_PHY_DOWN: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PHY_DOWN\n"); @@ -3536,7 +3531,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb) pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PORT_INVALID\n"); sas_phy_disconnected(sas_phy); phy->phy_attached = 0; - sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); break; /* the broadcast change primitive received, tell the LIBSAS this event to revalidate the sas domain*/ @@ -3547,20 +3542,20 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb) spin_lock_irqsave(&sas_phy->sas_prim_lock, flags); sas_phy->sas_prim = HW_EVENT_BROADCAST_CHANGE; spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags); - sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); + sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); break; case HW_EVENT_PHY_ERROR: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PHY_ERROR\n"); sas_phy_disconnected(&phy->sas_phy); phy->phy_attached = 0; - sas_ha->notify_phy_event(&phy->sas_phy, PHYE_OOB_ERROR); + sas_notify_phy_event(&phy->sas_phy, PHYE_OOB_ERROR); break; case HW_EVENT_BROADCAST_EXP: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_BROADCAST_EXP\n"); spin_lock_irqsave(&sas_phy->sas_prim_lock, flags); sas_phy->sas_prim = HW_EVENT_BROADCAST_EXP; spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags); - sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); + sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); break; case HW_EVENT_LINK_ERR_INVALID_DWORD: pm8001_dbg(pm8001_ha, MSG, @@ -3597,7 +3592,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb) spin_lock_irqsave(&sas_phy->sas_prim_lock, flags); sas_phy->sas_prim = HW_EVENT_BROADCAST_SES; spin_unlock_irqrestore(&sas_phy->sas_prim_lock, flags); - sas_ha->notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); + sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD); break; case HW_EVENT_INBOUND_CRC_ERROR: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_INBOUND_CRC_ERROR\n"); @@ -3607,13 +3602,13 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb) break; case HW_EVENT_HARD_RESET_RECEIVED: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_HARD_RESET_RECEIVED\n"); - sas_ha->notify_port_event(sas_phy, PORTE_HARD_RESET); + sas_notify_port_event(sas_phy, PORTE_HARD_RESET); break; case HW_EVENT_ID_FRAME_TIMEOUT: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_ID_FRAME_TIMEOUT\n"); sas_phy_disconnected(sas_phy); phy->phy_attached = 0; - sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); break; case HW_EVENT_LINK_ERR_PHY_RESET_FAILED: pm8001_dbg(pm8001_ha, MSG, @@ -3623,7 +3618,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb) port_id, phy_id, 0, 0); sas_phy_disconnected(sas_phy); phy->phy_attached = 0; - sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); break; case HW_EVENT_PORT_RESET_TIMER_TMO: pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PORT_RESET_TIMER_TMO\n"); @@ -3631,7 +3626,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb) port_id, phy_id, 0, 0); sas_phy_disconnected(sas_phy); phy->phy_attached = 0; - sas_ha->notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); + sas_notify_port_event(sas_phy, PORTE_LINK_RESET_ERR); if (pm8001_ha->phy[phy_id].reset_completion) { pm8001_ha->phy[phy_id].port_reset_status = PORT_RESET_TMO; @@ -3648,7 +3643,7 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb) for (i = 0; i < pm8001_ha->chip->n_phy; i++) { if (port->wide_port_phymap & (1 << i)) { phy = &pm8001_ha->phy[i]; - sas_ha->notify_phy_event(&phy->sas_phy, + sas_notify_phy_event(&phy->sas_phy, PHYE_LOSS_OF_SIGNAL); port->wide_port_phymap &= ~(1 << i); } diff --git a/drivers/scsi/qedi/qedi_main.c b/drivers/scsi/qedi/qedi_main.c index 47ad64b06623..69c5b5ee2169 100644 --- a/drivers/scsi/qedi/qedi_main.c +++ b/drivers/scsi/qedi/qedi_main.c @@ -1675,6 +1675,7 @@ static int qedi_alloc_global_queues(struct qedi_ctx *qedi) if (!qedi->global_queues[i]) { QEDI_ERR(&qedi->dbg_ctx, "Unable to allocation global queue %d.\n", i); + status = -ENOMEM; goto mem_alloc_failure; } diff --git a/drivers/scsi/qla2xxx/qla_dbg.c b/drivers/scsi/qla2xxx/qla_dbg.c index bb7431912d41..144a893e7335 100644 --- a/drivers/scsi/qla2xxx/qla_dbg.c +++ b/drivers/scsi/qla2xxx/qla_dbg.c @@ -202,6 +202,7 @@ qla24xx_dump_ram(struct qla_hw_data *ha, uint32_t addr, __be32 *ram, wrt_reg_word(®->mailbox0, MBC_DUMP_RISC_RAM_EXTENDED); wrt_reg_word(®->mailbox1, LSW(addr)); wrt_reg_word(®->mailbox8, MSW(addr)); + wrt_reg_word(®->mailbox10, 0); wrt_reg_word(®->mailbox2, MSW(LSD(dump_dma))); wrt_reg_word(®->mailbox3, LSW(LSD(dump_dma))); diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index d7d4ab65009c..510cbe2bf1e5 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -4276,7 +4276,8 @@ qla2x00_dump_ram(scsi_qla_host_t *vha, dma_addr_t req_dma, uint32_t addr, if (MSW(addr) || IS_FWI2_CAPABLE(vha->hw)) { mcp->mb[0] = MBC_DUMP_RISC_RAM_EXTENDED; mcp->mb[8] = MSW(addr); - mcp->out_mb = MBX_8|MBX_0; + mcp->mb[10] = 0; + mcp->out_mb = MBX_10|MBX_8|MBX_0; } else { mcp->mb[0] = MBC_DUMP_RISC_RAM; mcp->out_mb = MBX_0; diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c index 0d09480b66cd..17f541030f5b 100644 --- a/drivers/scsi/qla2xxx/qla_target.c +++ b/drivers/scsi/qla2xxx/qla_target.c @@ -3223,8 +3223,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, if (!qpair->fw_started || (cmd->reset_count != qpair->chip_reset) || (cmd->sess && cmd->sess->deleted)) { cmd->state = QLA_TGT_STATE_PROCESSED; - res = 0; - goto free; + return 0; } ql_dbg_qp(ql_dbg_tgt, qpair, 0xe018, @@ -3235,8 +3234,9 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, res = qlt_pre_xmit_response(cmd, &prm, xmit_type, scsi_status, &full_req_cnt); - if (unlikely(res != 0)) - goto free; + if (unlikely(res != 0)) { + return res; + } spin_lock_irqsave(qpair->qp_lock_ptr, flags); @@ -3256,8 +3256,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, vha->flags.online, qla2x00_reset_active(vha), cmd->reset_count, qpair->chip_reset); spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); - res = 0; - goto free; + return 0; } /* Does F/W have an IOCBs for this request */ @@ -3360,8 +3359,6 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type, qlt_unmap_sg(vha, cmd); spin_unlock_irqrestore(qpair->qp_lock_ptr, flags); -free: - vha->hw->tgt.tgt_ops->free_cmd(cmd); return res; } EXPORT_SYMBOL(qlt_xmit_response); diff --git a/drivers/scsi/qla2xxx/tcm_qla2xxx.c b/drivers/scsi/qla2xxx/tcm_qla2xxx.c index b55fc768a2a7..8b4890cdd4ca 100644 --- a/drivers/scsi/qla2xxx/tcm_qla2xxx.c +++ b/drivers/scsi/qla2xxx/tcm_qla2xxx.c @@ -644,7 +644,6 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd) { struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); - struct scsi_qla_host *vha = cmd->vha; if (cmd->aborted) { /* Cmd can loop during Q-full. tcm_qla2xxx_aborted_task @@ -657,7 +656,6 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd) cmd->se_cmd.transport_state, cmd->se_cmd.t_state, cmd->se_cmd.se_cmd_flags); - vha->hw->tgt.tgt_ops->free_cmd(cmd); return 0; } @@ -685,7 +683,6 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd) { struct qla_tgt_cmd *cmd = container_of(se_cmd, struct qla_tgt_cmd, se_cmd); - struct scsi_qla_host *vha = cmd->vha; int xmit_type = QLA_TGT_XMIT_STATUS; if (cmd->aborted) { @@ -699,7 +696,6 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd) cmd, kref_read(&cmd->se_cmd.cmd_kref), cmd->se_cmd.transport_state, cmd->se_cmd.t_state, cmd->se_cmd.se_cmd_flags); - vha->hw->tgt.tgt_ops->free_cmd(cmd); return 0; } cmd->bufflen = se_cmd->data_length; diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 2e68c0a87698..c53c3f9fa526 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -132,7 +132,11 @@ show_transport_handle(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_internal *priv = dev_to_iscsi_internal(dev); - return sprintf(buf, "%llu\n", (unsigned long long)iscsi_handle(priv->iscsi_transport)); + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + return sysfs_emit(buf, "%llu\n", + (unsigned long long)iscsi_handle(priv->iscsi_transport)); } static DEVICE_ATTR(handle, S_IRUGO, show_transport_handle, NULL); @@ -142,7 +146,7 @@ show_transport_##name(struct device *dev, \ struct device_attribute *attr,char *buf) \ { \ struct iscsi_internal *priv = dev_to_iscsi_internal(dev); \ - return sprintf(buf, format"\n", priv->iscsi_transport->name); \ + return sysfs_emit(buf, format"\n", priv->iscsi_transport->name);\ } \ static DEVICE_ATTR(name, S_IRUGO, show_transport_##name, NULL); @@ -183,7 +187,7 @@ static ssize_t show_ep_handle(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev); - return sprintf(buf, "%llu\n", (unsigned long long) ep->id); + return sysfs_emit(buf, "%llu\n", (unsigned long long) ep->id); } static ISCSI_ATTR(ep, handle, S_IRUGO, show_ep_handle, NULL); @@ -2883,6 +2887,9 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev) struct iscsi_cls_session *session; int err = 0, value = 0; + if (ev->u.set_param.len > PAGE_SIZE) + return -EINVAL; + session = iscsi_session_lookup(ev->u.set_param.sid); conn = iscsi_conn_lookup(ev->u.set_param.sid, ev->u.set_param.cid); if (!conn || !session) @@ -3030,6 +3037,9 @@ iscsi_set_host_param(struct iscsi_transport *transport, if (!transport->set_host_param) return -ENOSYS; + if (ev->u.set_host_param.len > PAGE_SIZE) + return -EINVAL; + shost = scsi_host_lookup(ev->u.set_host_param.host_no); if (!shost) { printk(KERN_ERR "set_host_param could not find host no %u\n", @@ -3617,6 +3627,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) { int err = 0; u32 portid; + u32 pdu_len; struct iscsi_uevent *ev = nlmsg_data(nlh); struct iscsi_transport *transport = NULL; struct iscsi_internal *priv; @@ -3624,6 +3635,9 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) struct iscsi_cls_conn *conn; struct iscsi_endpoint *ep = NULL; + if (!netlink_capable(skb, CAP_SYS_ADMIN)) + return -EPERM; + if (nlh->nlmsg_type == ISCSI_UEVENT_PATH_UPDATE) *group = ISCSI_NL_GRP_UIP; else @@ -3756,6 +3770,14 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) err = -EINVAL; break; case ISCSI_UEVENT_SEND_PDU: + pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev); + + if ((ev->u.send_pdu.hdr_size > pdu_len) || + (ev->u.send_pdu.data_size > (pdu_len - ev->u.send_pdu.hdr_size))) { + err = -EINVAL; + break; + } + conn = iscsi_conn_lookup(ev->u.send_pdu.sid, ev->u.send_pdu.cid); if (conn) { mutex_lock(&conn_mutex); @@ -3960,7 +3982,7 @@ static ssize_t show_conn_state(struct device *dev, conn->state < ARRAY_SIZE(connection_state_names)) state = connection_state_names[conn->state]; - return sprintf(buf, "%s\n", state); + return sysfs_emit(buf, "%s\n", state); } static ISCSI_CLASS_ATTR(conn, state, S_IRUGO, show_conn_state, NULL); @@ -4188,7 +4210,7 @@ show_priv_session_state(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); - return sprintf(buf, "%s\n", iscsi_session_state_name(session->state)); + return sysfs_emit(buf, "%s\n", iscsi_session_state_name(session->state)); } static ISCSI_CLASS_ATTR(priv_sess, state, S_IRUGO, show_priv_session_state, NULL); @@ -4197,7 +4219,7 @@ show_priv_session_creator(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); - return sprintf(buf, "%d\n", session->creator); + return sysfs_emit(buf, "%d\n", session->creator); } static ISCSI_CLASS_ATTR(priv_sess, creator, S_IRUGO, show_priv_session_creator, NULL); @@ -4206,7 +4228,7 @@ show_priv_session_target_id(struct device *dev, struct device_attribute *attr, char *buf) { struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent); - return sprintf(buf, "%d\n", session->target_id); + return sysfs_emit(buf, "%d\n", session->target_id); } static ISCSI_CLASS_ATTR(priv_sess, target_id, S_IRUGO, show_priv_session_target_id, NULL); @@ -4219,8 +4241,8 @@ show_priv_session_##field(struct device *dev, \ struct iscsi_cls_session *session = \ iscsi_dev_to_session(dev->parent); \ if (session->field == -1) \ - return sprintf(buf, "off\n"); \ - return sprintf(buf, format"\n", session->field); \ + return sysfs_emit(buf, "off\n"); \ + return sysfs_emit(buf, format"\n", session->field); \ } #define iscsi_priv_session_attr_store(field) \ diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index a3d2d4bc4a3d..6a3a163b0706 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -707,9 +707,9 @@ static int sd_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, put_unaligned_be16(spsp, &cdb[2]); put_unaligned_be32(len, &cdb[6]); - ret = scsi_execute_req(sdev, cdb, - send ? DMA_TO_DEVICE : DMA_FROM_DEVICE, - buffer, len, NULL, SD_TIMEOUT, sdkp->max_retries, NULL); + ret = scsi_execute(sdev, cdb, send ? DMA_TO_DEVICE : DMA_FROM_DEVICE, + buffer, len, NULL, NULL, SD_TIMEOUT, sdkp->max_retries, 0, + RQF_PM, NULL); return ret <= 0 ? ret : -EIO; } #endif /* CONFIG_BLK_SED_OPAL */ diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index cf07b7f93579..87a7274e4632 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -688,6 +688,7 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) unsigned int nr_zones = sdkp->rev_nr_zones; u32 max_append; int ret = 0; + unsigned int flags; /* * For all zoned disks, initialize zone append emulation data if not @@ -720,16 +721,19 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp) disk->queue->nr_zones == nr_zones) goto unlock; + flags = memalloc_noio_save(); sdkp->zone_blocks = zone_blocks; sdkp->nr_zones = nr_zones; - sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_NOIO); + sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_KERNEL); if (!sdkp->rev_wp_offset) { ret = -ENOMEM; + memalloc_noio_restore(flags); goto unlock; } ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb); + memalloc_noio_restore(flags); kvfree(sdkp->rev_wp_offset); sdkp->rev_wp_offset = NULL; diff --git a/drivers/scsi/ufs/ufs-exynos.c b/drivers/scsi/ufs/ufs-exynos.c index a8770ff14588..267943a13a94 100644 --- a/drivers/scsi/ufs/ufs-exynos.c +++ b/drivers/scsi/ufs/ufs-exynos.c @@ -640,6 +640,11 @@ static int exynos_ufs_pre_pwr_mode(struct ufs_hba *hba, } } + /* setting for three timeout values for traffic class #0 */ + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA0), 8064); + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA1), 28224); + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA2), 20160); + return 0; out: return ret; @@ -1236,7 +1241,9 @@ struct exynos_ufs_drv_data exynos_ufs_drvs = { UFSHCI_QUIRK_BROKEN_HCE | UFSHCI_QUIRK_SKIP_RESET_INTR_AGGR | UFSHCD_QUIRK_BROKEN_OCS_FATAL_ERROR | - UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL, + UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL | + UFSHCD_QUIRK_SKIP_DEF_UNIPRO_TIMEOUT_SETTING | + UFSHCD_QUIRK_ALIGN_SG_WITH_PAGE_SIZE, .opts = EXYNOS_UFS_OPT_HAS_APB_CLK_CTRL | EXYNOS_UFS_OPT_BROKEN_AUTO_CLK_CTRL | EXYNOS_UFS_OPT_BROKEN_RX_SEL_IDX | diff --git a/drivers/scsi/ufs/ufs-mediatek.c b/drivers/scsi/ufs/ufs-mediatek.c index 80618af7c872..a981f261b304 100644 --- a/drivers/scsi/ufs/ufs-mediatek.c +++ b/drivers/scsi/ufs/ufs-mediatek.c @@ -661,6 +661,7 @@ static int ufs_mtk_init(struct ufs_hba *hba) /* Enable WriteBooster */ hba->caps |= UFSHCD_CAP_WB_EN; + hba->quirks |= UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL; hba->vps->wb_flush_threshold = UFS_WB_BUF_REMAIN_PERCENT(80); if (host->caps & UFS_MTK_CAP_DISABLE_AH8) @@ -910,7 +911,7 @@ static void ufs_mtk_vreg_set_lpm(struct ufs_hba *hba, bool lpm) if (!hba->vreg_info.vccq2 || !hba->vreg_info.vcc) return; - if (lpm & !hba->vreg_info.vcc->enabled) + if (lpm && !hba->vreg_info.vcc->enabled) regulator_set_mode(hba->vreg_info.vccq2->reg, REGULATOR_MODE_IDLE); else if (!lpm) diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c index 2206b1e4b774..e55201f64c10 100644 --- a/drivers/scsi/ufs/ufs-qcom.c +++ b/drivers/scsi/ufs/ufs-qcom.c @@ -253,12 +253,17 @@ static int ufs_qcom_host_reset(struct ufs_hba *hba) { int ret = 0; struct ufs_qcom_host *host = ufshcd_get_variant(hba); + bool reenable_intr = false; if (!host->core_reset) { dev_warn(hba->dev, "%s: reset control not set\n", __func__); goto out; } + reenable_intr = hba->is_irq_enabled; + disable_irq(hba->irq); + hba->is_irq_enabled = false; + ret = reset_control_assert(host->core_reset); if (ret) { dev_err(hba->dev, "%s: core_reset assert failed, err = %d\n", @@ -280,6 +285,11 @@ static int ufs_qcom_host_reset(struct ufs_hba *hba) usleep_range(1000, 1100); + if (reenable_intr) { + enable_irq(hba->irq); + hba->is_irq_enabled = true; + } + out: return ret; } diff --git a/drivers/scsi/ufs/ufs-sysfs.c b/drivers/scsi/ufs/ufs-sysfs.c index 08e72b7eef6a..50e90416262b 100644 --- a/drivers/scsi/ufs/ufs-sysfs.c +++ b/drivers/scsi/ufs/ufs-sysfs.c @@ -792,7 +792,8 @@ static ssize_t _pname##_show(struct device *dev, \ struct scsi_device *sdev = to_scsi_device(dev); \ struct ufs_hba *hba = shost_priv(sdev->host); \ u8 lun = ufshcd_scsi_to_upiu_lun(sdev->lun); \ - if (!ufs_is_valid_unit_desc_lun(&hba->dev_info, lun)) \ + if (!ufs_is_valid_unit_desc_lun(&hba->dev_info, lun, \ + _duname##_DESC_PARAM##_puname)) \ return -EINVAL; \ return ufs_sysfs_read_desc_param(hba, QUERY_DESC_IDN_##_duname, \ lun, _duname##_DESC_PARAM##_puname, buf, _size); \ diff --git a/drivers/scsi/ufs/ufs.h b/drivers/scsi/ufs/ufs.h index 14dfda735adf..580aa56965d0 100644 --- a/drivers/scsi/ufs/ufs.h +++ b/drivers/scsi/ufs/ufs.h @@ -552,13 +552,15 @@ struct ufs_dev_info { * @return: true if the lun has a matching unit descriptor, false otherwise */ static inline bool ufs_is_valid_unit_desc_lun(struct ufs_dev_info *dev_info, - u8 lun) + u8 lun, u8 param_offset) { if (!dev_info || !dev_info->max_lu_supported) { pr_err("Max General LU supported by UFS isn't initialized\n"); return false; } - + /* WB is available only for the logical unit from 0 to 7 */ + if (param_offset == UNIT_DESC_PARAM_WB_BUF_ALLOC_UNITS) + return lun < UFS_UPIU_MAX_WB_LUN_ID; return lun == UFS_UPIU_RPMB_WLUN || (lun < dev_info->max_lu_supported); } diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c index fb32d122f2e3..16e1bd1aa49d 100644 --- a/drivers/scsi/ufs/ufshcd.c +++ b/drivers/scsi/ufs/ufshcd.c @@ -94,6 +94,8 @@ 16, 4, buf, __len, false); \ } while (0) +static bool early_suspend; + int ufshcd_dump_regs(struct ufs_hba *hba, size_t offset, size_t len, const char *prefix) { @@ -1182,19 +1184,30 @@ static int ufshcd_clock_scaling_prepare(struct ufs_hba *hba) */ ufshcd_scsi_block_requests(hba); down_write(&hba->clk_scaling_lock); - if (ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) { + + if (!hba->clk_scaling.is_allowed || + ufshcd_wait_for_doorbell_clr(hba, DOORBELL_CLR_TOUT_US)) { ret = -EBUSY; up_write(&hba->clk_scaling_lock); ufshcd_scsi_unblock_requests(hba); + goto out; } + /* let's not get into low power until clock scaling is completed */ + ufshcd_hold(hba, false); + +out: return ret; } -static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba) +static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba, bool writelock) { - up_write(&hba->clk_scaling_lock); + if (writelock) + up_write(&hba->clk_scaling_lock); + else + up_read(&hba->clk_scaling_lock); ufshcd_scsi_unblock_requests(hba); + ufshcd_release(hba); } /** @@ -1209,13 +1222,11 @@ static void ufshcd_clock_scaling_unprepare(struct ufs_hba *hba) static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up) { int ret = 0; - - /* let's not get into low power until clock scaling is completed */ - ufshcd_hold(hba, false); + bool is_writelock = true; ret = ufshcd_clock_scaling_prepare(hba); if (ret) - goto out; + return ret; /* scale down the gear before scaling down clocks */ if (!scale_up) { @@ -1241,14 +1252,12 @@ static int ufshcd_devfreq_scale(struct ufs_hba *hba, bool scale_up) } /* Enable Write Booster if we have scaled up else disable it */ - up_write(&hba->clk_scaling_lock); + downgrade_write(&hba->clk_scaling_lock); + is_writelock = false; ufshcd_wb_ctrl(hba, scale_up); - down_write(&hba->clk_scaling_lock); out_unprepare: - ufshcd_clock_scaling_unprepare(hba); -out: - ufshcd_release(hba); + ufshcd_clock_scaling_unprepare(hba, is_writelock); return ret; } @@ -1522,7 +1531,7 @@ static ssize_t ufshcd_clkscale_enable_show(struct device *dev, { struct ufs_hba *hba = dev_get_drvdata(dev); - return snprintf(buf, PAGE_SIZE, "%d\n", hba->clk_scaling.is_allowed); + return snprintf(buf, PAGE_SIZE, "%d\n", hba->clk_scaling.is_enabled); } static ssize_t ufshcd_clkscale_enable_store(struct device *dev, @@ -1536,7 +1545,7 @@ static ssize_t ufshcd_clkscale_enable_store(struct device *dev, return -EINVAL; value = !!value; - if (value == hba->clk_scaling.is_allowed) + if (value == hba->clk_scaling.is_enabled) goto out; pm_runtime_get_sync(hba->dev); @@ -1545,7 +1554,7 @@ static ssize_t ufshcd_clkscale_enable_store(struct device *dev, cancel_work_sync(&hba->clk_scaling.suspend_work); cancel_work_sync(&hba->clk_scaling.resume_work); - hba->clk_scaling.is_allowed = value; + hba->clk_scaling.is_enabled = value; if (value) { ufshcd_resume_clkscaling(hba); @@ -1883,8 +1892,6 @@ static void ufshcd_init_clk_scaling(struct ufs_hba *hba) snprintf(wq_name, sizeof(wq_name), "ufs_clkscaling_%d", hba->host->host_no); hba->clk_scaling.workq = create_singlethread_workqueue(wq_name); - - ufshcd_clkscaling_init_sysfs(hba); } static void ufshcd_exit_clk_scaling(struct ufs_hba *hba) @@ -1892,6 +1899,8 @@ static void ufshcd_exit_clk_scaling(struct ufs_hba *hba) if (!ufshcd_is_clkscaling_supported(hba)) return; + if (hba->clk_scaling.enable_attr.attr.name) + device_remove_file(hba->dev, &hba->clk_scaling.enable_attr); destroy_workqueue(hba->clk_scaling.workq); ufshcd_devfreq_remove(hba); } @@ -1956,7 +1965,7 @@ static void ufshcd_clk_scaling_start_busy(struct ufs_hba *hba) if (!hba->clk_scaling.active_reqs++) queue_resume_work = true; - if (!hba->clk_scaling.is_allowed || hba->pm_op_in_progress) + if (!hba->clk_scaling.is_enabled || hba->pm_op_in_progress) return; if (queue_resume_work) @@ -3425,7 +3434,7 @@ static inline int ufshcd_read_unit_desc_param(struct ufs_hba *hba, * Unit descriptors are only available for general purpose LUs (LUN id * from 0 to 7) and RPMB Well known LU. */ - if (!ufs_is_valid_unit_desc_lun(&hba->dev_info, lun)) + if (!ufs_is_valid_unit_desc_lun(&hba->dev_info, lun, param_offset)) return -EOPNOTSUPP; return ufshcd_read_desc_param(hba, QUERY_DESC_IDN_UNIT, lun, @@ -4218,25 +4227,27 @@ static int ufshcd_change_power_mode(struct ufs_hba *hba, ufshcd_dme_set(hba, UIC_ARG_MIB(PA_HSSERIES), pwr_mode->hs_rate); - ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA0), - DL_FC0ProtectionTimeOutVal_Default); - ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA1), - DL_TC0ReplayTimeOutVal_Default); - ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA2), - DL_AFC0ReqTimeOutVal_Default); - ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA3), - DL_FC1ProtectionTimeOutVal_Default); - ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA4), - DL_TC1ReplayTimeOutVal_Default); - ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA5), - DL_AFC1ReqTimeOutVal_Default); - - ufshcd_dme_set(hba, UIC_ARG_MIB(DME_LocalFC0ProtectionTimeOutVal), - DL_FC0ProtectionTimeOutVal_Default); - ufshcd_dme_set(hba, UIC_ARG_MIB(DME_LocalTC0ReplayTimeOutVal), - DL_TC0ReplayTimeOutVal_Default); - ufshcd_dme_set(hba, UIC_ARG_MIB(DME_LocalAFC0ReqTimeOutVal), - DL_AFC0ReqTimeOutVal_Default); + if (!(hba->quirks & UFSHCD_QUIRK_SKIP_DEF_UNIPRO_TIMEOUT_SETTING)) { + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA0), + DL_FC0ProtectionTimeOutVal_Default); + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA1), + DL_TC0ReplayTimeOutVal_Default); + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA2), + DL_AFC0ReqTimeOutVal_Default); + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA3), + DL_FC1ProtectionTimeOutVal_Default); + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA4), + DL_TC1ReplayTimeOutVal_Default); + ufshcd_dme_set(hba, UIC_ARG_MIB(PA_PWRMODEUSERDATA5), + DL_AFC1ReqTimeOutVal_Default); + + ufshcd_dme_set(hba, UIC_ARG_MIB(DME_LocalFC0ProtectionTimeOutVal), + DL_FC0ProtectionTimeOutVal_Default); + ufshcd_dme_set(hba, UIC_ARG_MIB(DME_LocalTC0ReplayTimeOutVal), + DL_TC0ReplayTimeOutVal_Default); + ufshcd_dme_set(hba, UIC_ARG_MIB(DME_LocalAFC0ReqTimeOutVal), + DL_AFC0ReqTimeOutVal_Default); + } ret = ufshcd_uic_change_pwr_mode(hba, pwr_mode->pwr_rx << 4 | pwr_mode->pwr_tx); @@ -4827,6 +4838,8 @@ static int ufshcd_slave_configure(struct scsi_device *sdev) struct request_queue *q = sdev->request_queue; blk_queue_update_dma_pad(q, PRDT_DATA_BYTE_COUNT_PAD - 1); + if (hba->quirks & UFSHCD_QUIRK_ALIGN_SG_WITH_PAGE_SIZE) + blk_queue_update_dma_alignment(q, PAGE_SIZE - 1); if (ufshcd_is_rpm_autosuspend_allowed(hba)) sdev->rpm_autosuspend = 1; @@ -5738,18 +5751,24 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba) ufshcd_vops_resume(hba, pm_op); } else { ufshcd_hold(hba, false); - if (hba->clk_scaling.is_allowed) { + if (hba->clk_scaling.is_enabled) { cancel_work_sync(&hba->clk_scaling.suspend_work); cancel_work_sync(&hba->clk_scaling.resume_work); ufshcd_suspend_clkscaling(hba); } + down_write(&hba->clk_scaling_lock); + hba->clk_scaling.is_allowed = false; + up_write(&hba->clk_scaling_lock); } } static void ufshcd_err_handling_unprepare(struct ufs_hba *hba) { ufshcd_release(hba); - if (hba->clk_scaling.is_allowed) + down_write(&hba->clk_scaling_lock); + hba->clk_scaling.is_allowed = true; + up_write(&hba->clk_scaling_lock); + if (hba->clk_scaling.is_enabled) ufshcd_resume_clkscaling(hba); pm_runtime_put(hba->dev); } @@ -7735,12 +7754,14 @@ static int ufshcd_add_lus(struct ufs_hba *hba) sizeof(struct ufs_pa_layer_attr)); hba->clk_scaling.saved_pwr_info.is_valid = true; if (!hba->devfreq) { + hba->clk_scaling.is_allowed = true; ret = ufshcd_devfreq_init(hba); if (ret) goto out; - } - hba->clk_scaling.is_allowed = true; + hba->clk_scaling.is_enabled = true; + ufshcd_clkscaling_init_sysfs(hba); + } } ufs_bsg_probe(hba); @@ -8655,11 +8676,14 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) ufshcd_hold(hba, false); hba->clk_gating.is_suspended = true; - if (hba->clk_scaling.is_allowed) { + if (hba->clk_scaling.is_enabled) { cancel_work_sync(&hba->clk_scaling.suspend_work); cancel_work_sync(&hba->clk_scaling.resume_work); ufshcd_suspend_clkscaling(hba); } + down_write(&hba->clk_scaling_lock); + hba->clk_scaling.is_allowed = false; + up_write(&hba->clk_scaling_lock); if (req_dev_pwr_mode == UFS_ACTIVE_PWR_MODE && req_link_state == UIC_LINK_ACTIVE_STATE) { @@ -8756,8 +8780,6 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) goto out; set_link_active: - if (hba->clk_scaling.is_allowed) - ufshcd_resume_clkscaling(hba); ufshcd_vreg_set_hpm(hba); /* * Device hardware reset is required to exit DeepSleep. Also, for @@ -8781,7 +8803,10 @@ static int ufshcd_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op) if (!ufshcd_set_dev_pwr_mode(hba, UFS_ACTIVE_PWR_MODE)) ufshcd_disable_auto_bkops(hba); enable_gating: - if (hba->clk_scaling.is_allowed) + down_write(&hba->clk_scaling_lock); + hba->clk_scaling.is_allowed = true; + up_write(&hba->clk_scaling_lock); + if (hba->clk_scaling.is_enabled) ufshcd_resume_clkscaling(hba); hba->clk_gating.is_suspended = false; hba->dev_info.b_rpm_dev_flush_capable = false; @@ -8885,7 +8910,10 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) hba->clk_gating.is_suspended = false; - if (hba->clk_scaling.is_allowed) + down_write(&hba->clk_scaling_lock); + hba->clk_scaling.is_allowed = true; + up_write(&hba->clk_scaling_lock); + if (hba->clk_scaling.is_enabled) ufshcd_resume_clkscaling(hba); /* Enable Auto-Hibernate if configured */ @@ -8911,8 +8939,6 @@ static int ufshcd_resume(struct ufs_hba *hba, enum ufs_pm_op pm_op) ufshcd_vreg_set_lpm(hba); disable_irq_and_vops_clks: ufshcd_disable_irq(hba); - if (hba->clk_scaling.is_allowed) - ufshcd_suspend_clkscaling(hba); ufshcd_setup_clocks(hba, false); if (ufshcd_is_clkgating_allowed(hba)) { hba->clk_gating.state = CLKS_OFF; @@ -8939,8 +8965,14 @@ int ufshcd_system_suspend(struct ufs_hba *hba) int ret = 0; ktime_t start = ktime_get(); + if (!hba) { + early_suspend = true; + return 0; + } + down(&hba->eh_sem); - if (!hba || !hba->is_powered) + + if (!hba->is_powered) return 0; if ((ufs_get_pm_lvl_to_dev_pwr_mode(hba->spm_lvl) == @@ -8989,9 +9021,12 @@ int ufshcd_system_resume(struct ufs_hba *hba) int ret = 0; ktime_t start = ktime_get(); - if (!hba) { - up(&hba->eh_sem); + if (!hba) return -EINVAL; + + if (unlikely(early_suspend)) { + early_suspend = false; + down(&hba->eh_sem); } if (!hba->is_powered || pm_runtime_suspended(hba->dev)) @@ -9140,8 +9175,6 @@ void ufshcd_remove(struct ufs_hba *hba) ufshcd_exit_clk_scaling(hba); ufshcd_exit_clk_gating(hba); - if (ufshcd_is_clkscaling_supported(hba)) - device_remove_file(hba->dev, &hba->clk_scaling.enable_attr); ufshcd_hba_exit(hba); } EXPORT_SYMBOL_GPL(ufshcd_remove); diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h index aa9ea3552323..7d0b00f23761 100644 --- a/drivers/scsi/ufs/ufshcd.h +++ b/drivers/scsi/ufs/ufshcd.h @@ -419,7 +419,10 @@ struct ufs_saved_pwr_info { * @suspend_work: worker to suspend devfreq * @resume_work: worker to resume devfreq * @min_gear: lowest HS gear to scale down to - * @is_allowed: tracks if scaling is currently allowed or not + * @is_enabled: tracks if scaling is currently enabled or not, controlled by + clkscale_enable sysfs node + * @is_allowed: tracks if scaling is currently allowed or not, used to block + clock scaling which is not invoked from devfreq governor * @is_busy_started: tracks if busy period has started or not * @is_suspended: tracks if devfreq is suspended or not */ @@ -434,6 +437,7 @@ struct ufs_clk_scaling { struct work_struct suspend_work; struct work_struct resume_work; u32 min_gear; + bool is_enabled; bool is_allowed; bool is_busy_started; bool is_suspended; @@ -551,6 +555,16 @@ enum ufshcd_quirks { */ UFSHCI_QUIRK_SKIP_MANUAL_WB_FLUSH_CTRL = 1 << 12, + /* + * This quirk needs to disable unipro timeout values + * before power mode change + */ + UFSHCD_QUIRK_SKIP_DEF_UNIPRO_TIMEOUT_SETTING = 1 << 13, + + /* + * This quirk allows only sg entries aligned with page size. + */ + UFSHCD_QUIRK_ALIGN_SG_WITH_PAGE_SIZE = 1 << 14, }; enum ufshcd_caps { diff --git a/drivers/soc/aspeed/aspeed-lpc-snoop.c b/drivers/soc/aspeed/aspeed-lpc-snoop.c index 682ba0eb4eba..20acac6342ef 100644 --- a/drivers/soc/aspeed/aspeed-lpc-snoop.c +++ b/drivers/soc/aspeed/aspeed-lpc-snoop.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -67,6 +68,7 @@ struct aspeed_lpc_snoop_channel { struct aspeed_lpc_snoop { struct regmap *regmap; int irq; + struct clk *clk; struct aspeed_lpc_snoop_channel chan[NUM_SNOOP_CHANNELS]; }; @@ -282,22 +284,42 @@ static int aspeed_lpc_snoop_probe(struct platform_device *pdev) return -ENODEV; } + lpc_snoop->clk = devm_clk_get(dev, NULL); + if (IS_ERR(lpc_snoop->clk)) { + rc = PTR_ERR(lpc_snoop->clk); + if (rc != -EPROBE_DEFER) + dev_err(dev, "couldn't get clock\n"); + return rc; + } + rc = clk_prepare_enable(lpc_snoop->clk); + if (rc) { + dev_err(dev, "couldn't enable clock\n"); + return rc; + } + rc = aspeed_lpc_snoop_config_irq(lpc_snoop, pdev); if (rc) - return rc; + goto err; rc = aspeed_lpc_enable_snoop(lpc_snoop, dev, 0, port); if (rc) - return rc; + goto err; /* Configuration of 2nd snoop channel port is optional */ if (of_property_read_u32_index(dev->of_node, "snoop-ports", 1, &port) == 0) { rc = aspeed_lpc_enable_snoop(lpc_snoop, dev, 1, port); - if (rc) + if (rc) { aspeed_lpc_disable_snoop(lpc_snoop, 0); + goto err; + } } + return 0; + +err: + clk_disable_unprepare(lpc_snoop->clk); + return rc; } @@ -309,6 +331,8 @@ static int aspeed_lpc_snoop_remove(struct platform_device *pdev) aspeed_lpc_disable_snoop(lpc_snoop, 0); aspeed_lpc_disable_snoop(lpc_snoop, 1); + clk_disable_unprepare(lpc_snoop->clk); + return 0; } diff --git a/drivers/soc/aspeed/aspeed-socinfo.c b/drivers/soc/aspeed/aspeed-socinfo.c index 773930e0cb10..e3215f826d17 100644 --- a/drivers/soc/aspeed/aspeed-socinfo.c +++ b/drivers/soc/aspeed/aspeed-socinfo.c @@ -25,6 +25,7 @@ static struct { /* AST2600 */ { "AST2600", 0x05000303 }, { "AST2620", 0x05010203 }, + { "AST2605", 0x05030103 }, }; static const char *siliconid_to_name(u32 siliconid) @@ -43,14 +44,30 @@ static const char *siliconid_to_name(u32 siliconid) static const char *siliconid_to_rev(u32 siliconid) { unsigned int rev = (siliconid >> 16) & 0xff; - - switch (rev) { - case 0: - return "A0"; - case 1: - return "A1"; - case 3: - return "A2"; + unsigned int gen = (siliconid >> 24) & 0xff; + + if (gen < 0x5) { + /* AST2500 and below */ + switch (rev) { + case 0: + return "A0"; + case 1: + return "A1"; + case 3: + return "A2"; + } + } else { + /* AST2600 */ + switch (rev) { + case 0: + return "A0"; + case 1: + return "A1"; + case 2: + return "A2"; + case 3: + return "A3"; + } } return "??"; diff --git a/drivers/soc/qcom/ocmem.c b/drivers/soc/qcom/ocmem.c index 7f9e9944d1ea..f1875dc31ae2 100644 --- a/drivers/soc/qcom/ocmem.c +++ b/drivers/soc/qcom/ocmem.c @@ -189,6 +189,7 @@ struct ocmem *of_get_ocmem(struct device *dev) { struct platform_device *pdev; struct device_node *devnode; + struct ocmem *ocmem; devnode = of_parse_phandle(dev->of_node, "sram", 0); if (!devnode || !devnode->parent) { @@ -202,7 +203,12 @@ struct ocmem *of_get_ocmem(struct device *dev) return ERR_PTR(-EPROBE_DEFER); } - return platform_get_drvdata(pdev); + ocmem = platform_get_drvdata(pdev); + if (!ocmem) { + dev_err(dev, "Cannot get ocmem\n"); + return ERR_PTR(-ENODEV); + } + return ocmem; } EXPORT_SYMBOL(of_get_ocmem); diff --git a/drivers/soc/qcom/socinfo.c b/drivers/soc/qcom/socinfo.c index d21530d24253..6daa3c5771d1 100644 --- a/drivers/soc/qcom/socinfo.c +++ b/drivers/soc/qcom/socinfo.c @@ -286,7 +286,7 @@ static int qcom_show_pmic_model(struct seq_file *seq, void *p) if (model < 0) return -EINVAL; - if (model <= ARRAY_SIZE(pmic_models) && pmic_models[model]) + if (model < ARRAY_SIZE(pmic_models) && pmic_models[model]) seq_printf(seq, "%s\n", pmic_models[model]); else seq_printf(seq, "unknown (%d)\n", model); diff --git a/drivers/soc/samsung/exynos-asv.c b/drivers/soc/samsung/exynos-asv.c index 8abf4dfaa5c5..5daeadc36382 100644 --- a/drivers/soc/samsung/exynos-asv.c +++ b/drivers/soc/samsung/exynos-asv.c @@ -119,11 +119,6 @@ static int exynos_asv_probe(struct platform_device *pdev) u32 product_id = 0; int ret, i; - cpu_dev = get_cpu_device(0); - ret = dev_pm_opp_get_opp_count(cpu_dev); - if (ret < 0) - return -EPROBE_DEFER; - asv = devm_kzalloc(&pdev->dev, sizeof(*asv), GFP_KERNEL); if (!asv) return -ENOMEM; @@ -134,7 +129,13 @@ static int exynos_asv_probe(struct platform_device *pdev) return PTR_ERR(asv->chipid_regmap); } - regmap_read(asv->chipid_regmap, EXYNOS_CHIPID_REG_PRO_ID, &product_id); + ret = regmap_read(asv->chipid_regmap, EXYNOS_CHIPID_REG_PRO_ID, + &product_id); + if (ret < 0) { + dev_err(&pdev->dev, "Cannot read revision from ChipID: %d\n", + ret); + return -ENODEV; + } switch (product_id & EXYNOS_MASK) { case 0xE5422000: @@ -144,6 +145,11 @@ static int exynos_asv_probe(struct platform_device *pdev) return -ENODEV; } + cpu_dev = get_cpu_device(0); + ret = dev_pm_opp_get_opp_count(cpu_dev); + if (ret < 0) + return -EPROBE_DEFER; + ret = of_property_read_u32(pdev->dev.of_node, "samsung,asv-bin", &asv->of_bin); if (ret < 0) diff --git a/drivers/soc/ti/omap_prm.c b/drivers/soc/ti/omap_prm.c index bf1468e5bccb..51143a68a889 100644 --- a/drivers/soc/ti/omap_prm.c +++ b/drivers/soc/ti/omap_prm.c @@ -332,7 +332,7 @@ static const struct omap_prm_data dra7_prm_data[] = { { .name = "l3init", .base = 0x4ae07300, .pwrstctrl = 0x0, .pwrstst = 0x4, .dmap = &omap_prm_alwon, - .rstctrl = 0x10, .rstst = 0x14, .rstmap = rst_map_012, + .rstctrl = 0x10, .rstst = 0x14, .rstmap = rst_map_01, .clkdm_name = "pcie" }, { @@ -830,8 +830,12 @@ static int omap_reset_deassert(struct reset_controller_dev *rcdev, reset->prm->data->name, id); exit: - if (reset->clkdm) + if (reset->clkdm) { + /* At least dra7 iva needs a delay before clkdm idle */ + if (has_rstst) + udelay(1); pdata->clkdm_allow_idle(reset->clkdm); + } return ret; } diff --git a/drivers/soc/ti/pm33xx.c b/drivers/soc/ti/pm33xx.c index 64f3e3105540..7bab4bbaf02d 100644 --- a/drivers/soc/ti/pm33xx.c +++ b/drivers/soc/ti/pm33xx.c @@ -535,7 +535,7 @@ static int am33xx_pm_probe(struct platform_device *pdev) ret = am33xx_push_sram_idle(); if (ret) - goto err_free_sram; + goto err_unsetup_rtc; am33xx_pm_set_ipc_ops(); @@ -575,6 +575,9 @@ static int am33xx_pm_probe(struct platform_device *pdev) err_pm_runtime_disable: pm_runtime_disable(dev); wkup_m3_ipc_put(m3_ipc); +err_unsetup_rtc: + iounmap(rtc_base_virt); + clk_put(rtc_fck); err_free_sram: am33xx_pm_free_sram(); pm33xx_dev = NULL; diff --git a/drivers/soundwire/bus.c b/drivers/soundwire/bus.c index d1e8c3a54976..662b3b030246 100644 --- a/drivers/soundwire/bus.c +++ b/drivers/soundwire/bus.c @@ -405,10 +405,11 @@ sdw_nwrite_no_pm(struct sdw_slave *slave, u32 addr, size_t count, u8 *val) return sdw_transfer(slave->bus, &msg); } -static int sdw_write_no_pm(struct sdw_slave *slave, u32 addr, u8 value) +int sdw_write_no_pm(struct sdw_slave *slave, u32 addr, u8 value) { return sdw_nwrite_no_pm(slave, addr, 1, &value); } +EXPORT_SYMBOL(sdw_write_no_pm); static int sdw_bread_no_pm(struct sdw_bus *bus, u16 dev_num, u32 addr) @@ -476,8 +477,7 @@ int sdw_bwrite_no_pm_unlocked(struct sdw_bus *bus, u16 dev_num, u32 addr, u8 val } EXPORT_SYMBOL(sdw_bwrite_no_pm_unlocked); -static int -sdw_read_no_pm(struct sdw_slave *slave, u32 addr) +int sdw_read_no_pm(struct sdw_slave *slave, u32 addr) { u8 buf; int ret; @@ -488,6 +488,19 @@ sdw_read_no_pm(struct sdw_slave *slave, u32 addr) else return buf; } +EXPORT_SYMBOL(sdw_read_no_pm); + +static int sdw_update_no_pm(struct sdw_slave *slave, u32 addr, u8 mask, u8 val) +{ + int tmp; + + tmp = sdw_read_no_pm(slave, addr); + if (tmp < 0) + return tmp; + + tmp = (tmp & ~mask) | val; + return sdw_write_no_pm(slave, addr, tmp); +} /** * sdw_nread() - Read "n" contiguous SDW Slave registers @@ -500,16 +513,16 @@ int sdw_nread(struct sdw_slave *slave, u32 addr, size_t count, u8 *val) { int ret; - ret = pm_runtime_get_sync(slave->bus->dev); + ret = pm_runtime_get_sync(&slave->dev); if (ret < 0 && ret != -EACCES) { - pm_runtime_put_noidle(slave->bus->dev); + pm_runtime_put_noidle(&slave->dev); return ret; } ret = sdw_nread_no_pm(slave, addr, count, val); - pm_runtime_mark_last_busy(slave->bus->dev); - pm_runtime_put(slave->bus->dev); + pm_runtime_mark_last_busy(&slave->dev); + pm_runtime_put(&slave->dev); return ret; } @@ -526,16 +539,16 @@ int sdw_nwrite(struct sdw_slave *slave, u32 addr, size_t count, u8 *val) { int ret; - ret = pm_runtime_get_sync(slave->bus->dev); + ret = pm_runtime_get_sync(&slave->dev); if (ret < 0 && ret != -EACCES) { - pm_runtime_put_noidle(slave->bus->dev); + pm_runtime_put_noidle(&slave->dev); return ret; } ret = sdw_nwrite_no_pm(slave, addr, count, val); - pm_runtime_mark_last_busy(slave->bus->dev); - pm_runtime_put(slave->bus->dev); + pm_runtime_mark_last_busy(&slave->dev); + pm_runtime_put(&slave->dev); return ret; } @@ -1210,7 +1223,7 @@ static int sdw_slave_set_frequency(struct sdw_slave *slave) } scale_index++; - ret = sdw_write(slave, SDW_SCP_BUS_CLOCK_BASE, base); + ret = sdw_write_no_pm(slave, SDW_SCP_BUS_CLOCK_BASE, base); if (ret < 0) { dev_err(&slave->dev, "SDW_SCP_BUS_CLOCK_BASE write failed:%d\n", ret); @@ -1218,13 +1231,13 @@ static int sdw_slave_set_frequency(struct sdw_slave *slave) } /* initialize scale for both banks */ - ret = sdw_write(slave, SDW_SCP_BUSCLOCK_SCALE_B0, scale_index); + ret = sdw_write_no_pm(slave, SDW_SCP_BUSCLOCK_SCALE_B0, scale_index); if (ret < 0) { dev_err(&slave->dev, "SDW_SCP_BUSCLOCK_SCALE_B0 write failed:%d\n", ret); return ret; } - ret = sdw_write(slave, SDW_SCP_BUSCLOCK_SCALE_B1, scale_index); + ret = sdw_write_no_pm(slave, SDW_SCP_BUSCLOCK_SCALE_B1, scale_index); if (ret < 0) dev_err(&slave->dev, "SDW_SCP_BUSCLOCK_SCALE_B1 write failed:%d\n", ret); @@ -1256,7 +1269,7 @@ static int sdw_initialize_slave(struct sdw_slave *slave) val = slave->prop.scp_int1_mask; /* Enable SCP interrupts */ - ret = sdw_update(slave, SDW_SCP_INTMASK1, val, val); + ret = sdw_update_no_pm(slave, SDW_SCP_INTMASK1, val, val); if (ret < 0) { dev_err(slave->bus->dev, "SDW_SCP_INTMASK1 write failed:%d\n", ret); @@ -1271,7 +1284,7 @@ static int sdw_initialize_slave(struct sdw_slave *slave) val = prop->dp0_prop->imp_def_interrupts; val |= SDW_DP0_INT_PORT_READY | SDW_DP0_INT_BRA_FAILURE; - ret = sdw_update(slave, SDW_DP0_INTMASK, val, val); + ret = sdw_update_no_pm(slave, SDW_DP0_INTMASK, val, val); if (ret < 0) dev_err(slave->bus->dev, "SDW_DP0_INTMASK read failed:%d\n", ret); @@ -1440,7 +1453,7 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave) ret = pm_runtime_get_sync(&slave->dev); if (ret < 0 && ret != -EACCES) { dev_err(&slave->dev, "Failed to resume device: %d\n", ret); - pm_runtime_put_noidle(slave->bus->dev); + pm_runtime_put_noidle(&slave->dev); return ret; } diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c index 9fa55164354a..580660599f46 100644 --- a/drivers/soundwire/cadence_master.c +++ b/drivers/soundwire/cadence_master.c @@ -484,10 +484,10 @@ cdns_fill_msg_resp(struct sdw_cdns *cdns, if (!(cdns->response_buf[i] & CDNS_MCP_RESP_ACK)) { no_ack = 1; dev_dbg_ratelimited(cdns->dev, "Msg Ack not received\n"); - if (cdns->response_buf[i] & CDNS_MCP_RESP_NACK) { - nack = 1; - dev_err_ratelimited(cdns->dev, "Msg NACK received\n"); - } + } + if (cdns->response_buf[i] & CDNS_MCP_RESP_NACK) { + nack = 1; + dev_err_ratelimited(cdns->dev, "Msg NACK received\n"); } } diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c index cabdadb09a1b..bc8520eb385e 100644 --- a/drivers/soundwire/intel_init.c +++ b/drivers/soundwire/intel_init.c @@ -405,11 +405,12 @@ int sdw_intel_acpi_scan(acpi_handle *parent_handle, { acpi_status status; + info->handle = NULL; status = acpi_walk_namespace(ACPI_TYPE_DEVICE, parent_handle, 1, sdw_intel_acpi_cb, NULL, info, NULL); - if (ACPI_FAILURE(status)) + if (ACPI_FAILURE(status) || info->handle == NULL) return -ENODEV; return sdw_intel_scan_controller(info); diff --git a/drivers/spi/spi-atmel.c b/drivers/spi/spi-atmel.c index 948396b382d7..f429436082af 100644 --- a/drivers/spi/spi-atmel.c +++ b/drivers/spi/spi-atmel.c @@ -1590,7 +1590,7 @@ static int atmel_spi_probe(struct platform_device *pdev) if (ret == 0) { as->use_dma = true; } else if (ret == -EPROBE_DEFER) { - return ret; + goto out_unmap_regs; } } else if (as->caps.has_pdc_support) { as->use_pdc = true; diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index ba7d40c2922f..2e1255bf1b42 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -461,7 +461,7 @@ static int cqspi_read_setup(struct cqspi_flash_pdata *f_pdata, /* Setup dummy clock cycles */ dummy_clk = op->dummy.nbytes * 8; if (dummy_clk > CQSPI_DUMMY_CLKS_MAX) - dummy_clk = CQSPI_DUMMY_CLKS_MAX; + return -EOPNOTSUPP; if (dummy_clk) reg |= (dummy_clk & CQSPI_REG_RD_INSTR_DUMMY_MASK) @@ -1198,6 +1198,7 @@ static int cqspi_probe(struct platform_device *pdev) cqspi = spi_master_get_devdata(master); cqspi->pdev = pdev; + platform_set_drvdata(pdev, cqspi); /* Obtain configuration from OF. */ ret = cqspi_of_get_pdata(cqspi); diff --git a/drivers/spi/spi-dw-bt1.c b/drivers/spi/spi-dw-bt1.c index 4aa8596fb1f2..5be6b7b80c21 100644 --- a/drivers/spi/spi-dw-bt1.c +++ b/drivers/spi/spi-dw-bt1.c @@ -84,7 +84,7 @@ static void dw_spi_bt1_dirmap_copy_from_map(void *to, void __iomem *from, size_t if (shift) { chunk = min_t(size_t, 4 - shift, len); data = readl_relaxed(from - shift); - memcpy(to, &data + shift, chunk); + memcpy(to, (char *)&data + shift, chunk); from += chunk; to += chunk; len -= chunk; diff --git a/drivers/spi/spi-fsl-spi.c b/drivers/spi/spi-fsl-spi.c index 6d8e0a05a535..e4a8d203f940 100644 --- a/drivers/spi/spi-fsl-spi.c +++ b/drivers/spi/spi-fsl-spi.c @@ -695,7 +695,7 @@ static void fsl_spi_cs_control(struct spi_device *spi, bool on) if (WARN_ON_ONCE(!pinfo->immr_spi_cs)) return; - iowrite32be(on ? SPI_BOOT_SEL_BIT : 0, pinfo->immr_spi_cs); + iowrite32be(on ? 0 : SPI_BOOT_SEL_BIT, pinfo->immr_spi_cs); } } diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 73ca821763d6..5dc4ea4b4450 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1685,7 +1685,7 @@ static int spi_imx_probe(struct platform_device *pdev) master->dev.of_node = pdev->dev.of_node; ret = spi_bitbang_start(&spi_imx->bitbang); if (ret) { - dev_err(&pdev->dev, "bitbang start failed with %d\n", ret); + dev_err_probe(&pdev->dev, ret, "bitbang start failed\n"); goto out_bitbang_start; } diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index f236e3034cf8..aafac128bb5f 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -21,7 +21,8 @@ enum { PORT_BSW1, PORT_BSW2, PORT_CE4100, - PORT_LPT, + PORT_LPT0, + PORT_LPT1, }; struct pxa_spi_info { @@ -57,8 +58,10 @@ static struct dw_dma_slave bsw1_rx_param = { .src_id = 7 }; static struct dw_dma_slave bsw2_tx_param = { .dst_id = 8 }; static struct dw_dma_slave bsw2_rx_param = { .src_id = 9 }; -static struct dw_dma_slave lpt_tx_param = { .dst_id = 0 }; -static struct dw_dma_slave lpt_rx_param = { .src_id = 1 }; +static struct dw_dma_slave lpt1_tx_param = { .dst_id = 0 }; +static struct dw_dma_slave lpt1_rx_param = { .src_id = 1 }; +static struct dw_dma_slave lpt0_tx_param = { .dst_id = 2 }; +static struct dw_dma_slave lpt0_rx_param = { .src_id = 3 }; static bool lpss_dma_filter(struct dma_chan *chan, void *param) { @@ -185,12 +188,19 @@ static struct pxa_spi_info spi_info_configs[] = { .num_chipselect = 1, .max_clk_rate = 50000000, }, - [PORT_LPT] = { + [PORT_LPT0] = { .type = LPSS_LPT_SSP, .port_id = 0, .setup = lpss_spi_setup, - .tx_param = &lpt_tx_param, - .rx_param = &lpt_rx_param, + .tx_param = &lpt0_tx_param, + .rx_param = &lpt0_rx_param, + }, + [PORT_LPT1] = { + .type = LPSS_LPT_SSP, + .port_id = 1, + .setup = lpss_spi_setup, + .tx_param = &lpt1_tx_param, + .rx_param = &lpt1_rx_param, }, }; @@ -285,8 +295,9 @@ static const struct pci_device_id pxa2xx_spi_pci_devices[] = { { PCI_VDEVICE(INTEL, 0x2290), PORT_BSW1 }, { PCI_VDEVICE(INTEL, 0x22ac), PORT_BSW2 }, { PCI_VDEVICE(INTEL, 0x2e6a), PORT_CE4100 }, - { PCI_VDEVICE(INTEL, 0x9ce6), PORT_LPT }, - { }, + { PCI_VDEVICE(INTEL, 0x9ce5), PORT_LPT0 }, + { PCI_VDEVICE(INTEL, 0x9ce6), PORT_LPT1 }, + { } }; MODULE_DEVICE_TABLE(pci, pxa2xx_spi_pci_devices); diff --git a/drivers/spi/spi-stm32.c b/drivers/spi/spi-stm32.c index 6017209c6d2f..53c4311cc6ab 100644 --- a/drivers/spi/spi-stm32.c +++ b/drivers/spi/spi-stm32.c @@ -928,8 +928,8 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id) mask |= STM32H7_SPI_SR_RXP; if (!(sr & mask)) { - dev_dbg(spi->dev, "spurious IT (sr=0x%08x, ier=0x%08x)\n", - sr, ier); + dev_warn(spi->dev, "spurious IT (sr=0x%08x, ier=0x%08x)\n", + sr, ier); spin_unlock_irqrestore(&spi->lock, flags); return IRQ_NONE; } @@ -956,15 +956,8 @@ static irqreturn_t stm32h7_spi_irq_thread(int irq, void *dev_id) } if (sr & STM32H7_SPI_SR_OVR) { - dev_warn(spi->dev, "Overrun: received value discarded\n"); - if (!spi->cur_usedma && (spi->rx_buf && (spi->rx_len > 0))) - stm32h7_spi_read_rxfifo(spi, false); - /* - * If overrun is detected while using DMA, it means that - * something went wrong, so stop the current transfer - */ - if (spi->cur_usedma) - end = true; + dev_err(spi->dev, "Overrun: RX data lost\n"); + end = true; } if (sr & STM32H7_SPI_SR_EOT) { @@ -1677,6 +1670,10 @@ static int stm32_spi_transfer_one(struct spi_master *master, struct stm32_spi *spi = spi_master_get_devdata(master); int ret; + /* Don't do anything on 0 bytes transfers */ + if (transfer->len == 0) + return 0; + spi->tx_buf = transfer->tx_buf; spi->rx_buf = transfer->rx_buf; spi->tx_len = spi->tx_buf ? transfer->len : 0; diff --git a/drivers/spi/spi-synquacer.c b/drivers/spi/spi-synquacer.c index 8cdca6ab8098..ea706d9629cb 100644 --- a/drivers/spi/spi-synquacer.c +++ b/drivers/spi/spi-synquacer.c @@ -490,6 +490,10 @@ static void synquacer_spi_set_cs(struct spi_device *spi, bool enable) val &= ~(SYNQUACER_HSSPI_DMPSEL_CS_MASK << SYNQUACER_HSSPI_DMPSEL_CS_SHIFT); val |= spi->chip_select << SYNQUACER_HSSPI_DMPSEL_CS_SHIFT; + + if (!enable) + val |= SYNQUACER_HSSPI_DMSTOP_STOP; + writel(val, sspi->regs + SYNQUACER_HSSPI_REG_DMSTART); } diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 720ab34784c1..ccca3a7409fa 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -1267,7 +1267,7 @@ static int spi_transfer_one_message(struct spi_controller *ctlr, ptp_read_system_prets(xfer->ptp_sts); } - if (xfer->tx_buf || xfer->rx_buf) { + if ((xfer->tx_buf || xfer->rx_buf) && xfer->len) { reinit_completion(&ctlr->xfer_completion); fallback_pio: diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c index de844b412110..bbbd311eda03 100644 --- a/drivers/spmi/spmi-pmic-arb.c +++ b/drivers/spmi/spmi-pmic-arb.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Copyright (c) 2012-2015, 2017, The Linux Foundation. All rights reserved. + * Copyright (c) 2012-2015, 2017, 2021, The Linux Foundation. All rights reserved. */ #include #include @@ -505,8 +505,7 @@ static void cleanup_irq(struct spmi_pmic_arb *pmic_arb, u16 apid, int id) static void periph_interrupt(struct spmi_pmic_arb *pmic_arb, u16 apid) { unsigned int irq; - u32 status; - int id; + u32 status, id; u8 sid = (pmic_arb->apid_data[apid].ppid >> 8) & 0xF; u8 per = pmic_arb->apid_data[apid].ppid & 0xFF; diff --git a/drivers/staging/comedi/drivers/addi_apci_1032.c b/drivers/staging/comedi/drivers/addi_apci_1032.c index 35b75f0c9200..81a246fbcc01 100644 --- a/drivers/staging/comedi/drivers/addi_apci_1032.c +++ b/drivers/staging/comedi/drivers/addi_apci_1032.c @@ -260,6 +260,7 @@ static irqreturn_t apci1032_interrupt(int irq, void *d) struct apci1032_private *devpriv = dev->private; struct comedi_subdevice *s = dev->read_subdev; unsigned int ctrl; + unsigned short val; /* check interrupt is from this device */ if ((inl(devpriv->amcc_iobase + AMCC_OP_REG_INTCSR) & @@ -275,7 +276,8 @@ static irqreturn_t apci1032_interrupt(int irq, void *d) outl(ctrl & ~APCI1032_CTRL_INT_ENA, dev->iobase + APCI1032_CTRL_REG); s->state = inl(dev->iobase + APCI1032_STATUS_REG) & 0xffff; - comedi_buf_write_samples(s, &s->state, 1); + val = s->state; + comedi_buf_write_samples(s, &val, 1); comedi_handle_events(dev, s); /* enable the interrupt */ diff --git a/drivers/staging/comedi/drivers/addi_apci_1500.c b/drivers/staging/comedi/drivers/addi_apci_1500.c index 11efb21555e3..b04c15dcfb57 100644 --- a/drivers/staging/comedi/drivers/addi_apci_1500.c +++ b/drivers/staging/comedi/drivers/addi_apci_1500.c @@ -208,7 +208,7 @@ static irqreturn_t apci1500_interrupt(int irq, void *d) struct comedi_device *dev = d; struct apci1500_private *devpriv = dev->private; struct comedi_subdevice *s = dev->read_subdev; - unsigned int status = 0; + unsigned short status = 0; unsigned int val; val = inl(devpriv->amcc + AMCC_OP_REG_INTCSR); @@ -238,14 +238,14 @@ static irqreturn_t apci1500_interrupt(int irq, void *d) * * Mask Meaning * ---------- ------------------------------------------ - * 0x00000001 Event 1 has occurred - * 0x00000010 Event 2 has occurred - * 0x00000100 Counter/timer 1 has run down (not implemented) - * 0x00001000 Counter/timer 2 has run down (not implemented) - * 0x00010000 Counter 3 has run down (not implemented) - * 0x00100000 Watchdog has run down (not implemented) - * 0x01000000 Voltage error - * 0x10000000 Short-circuit error + * 0b00000001 Event 1 has occurred + * 0b00000010 Event 2 has occurred + * 0b00000100 Counter/timer 1 has run down (not implemented) + * 0b00001000 Counter/timer 2 has run down (not implemented) + * 0b00010000 Counter 3 has run down (not implemented) + * 0b00100000 Watchdog has run down (not implemented) + * 0b01000000 Voltage error + * 0b10000000 Short-circuit error */ comedi_buf_write_samples(s, &status, 1); comedi_handle_events(dev, s); diff --git a/drivers/staging/comedi/drivers/adv_pci1710.c b/drivers/staging/comedi/drivers/adv_pci1710.c index 692893c7e5c3..090607760be6 100644 --- a/drivers/staging/comedi/drivers/adv_pci1710.c +++ b/drivers/staging/comedi/drivers/adv_pci1710.c @@ -300,11 +300,11 @@ static int pci1710_ai_eoc(struct comedi_device *dev, static int pci1710_ai_read_sample(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int cur_chan, - unsigned int *val) + unsigned short *val) { const struct boardtype *board = dev->board_ptr; struct pci1710_private *devpriv = dev->private; - unsigned int sample; + unsigned short sample; unsigned int chan; sample = inw(dev->iobase + PCI171X_AD_DATA_REG); @@ -345,7 +345,7 @@ static int pci1710_ai_insn_read(struct comedi_device *dev, pci1710_ai_setup_chanlist(dev, s, &insn->chanspec, 1, 1); for (i = 0; i < insn->n; i++) { - unsigned int val; + unsigned short val; /* start conversion */ outw(0, dev->iobase + PCI171X_SOFTTRG_REG); @@ -395,7 +395,7 @@ static void pci1710_handle_every_sample(struct comedi_device *dev, { struct comedi_cmd *cmd = &s->async->cmd; unsigned int status; - unsigned int val; + unsigned short val; int ret; status = inw(dev->iobase + PCI171X_STATUS_REG); @@ -455,7 +455,7 @@ static void pci1710_handle_fifo(struct comedi_device *dev, } for (i = 0; i < devpriv->max_samples; i++) { - unsigned int val; + unsigned short val; int ret; ret = pci1710_ai_read_sample(dev, s, s->async->cur_chan, &val); diff --git a/drivers/staging/comedi/drivers/das6402.c b/drivers/staging/comedi/drivers/das6402.c index 04e224f8b779..96f4107b8054 100644 --- a/drivers/staging/comedi/drivers/das6402.c +++ b/drivers/staging/comedi/drivers/das6402.c @@ -186,7 +186,7 @@ static irqreturn_t das6402_interrupt(int irq, void *d) if (status & DAS6402_STATUS_FFULL) { async->events |= COMEDI_CB_OVERFLOW; } else if (status & DAS6402_STATUS_FFNE) { - unsigned int val; + unsigned short val; val = das6402_ai_read_sample(dev, s); comedi_buf_write_samples(s, &val, 1); diff --git a/drivers/staging/comedi/drivers/das800.c b/drivers/staging/comedi/drivers/das800.c index 4ea100ff6930..2881808d6606 100644 --- a/drivers/staging/comedi/drivers/das800.c +++ b/drivers/staging/comedi/drivers/das800.c @@ -427,7 +427,7 @@ static irqreturn_t das800_interrupt(int irq, void *d) struct comedi_cmd *cmd; unsigned long irq_flags; unsigned int status; - unsigned int val; + unsigned short val; bool fifo_empty; bool fifo_overflow; int i; diff --git a/drivers/staging/comedi/drivers/dmm32at.c b/drivers/staging/comedi/drivers/dmm32at.c index 17e6018918bb..56682f01242f 100644 --- a/drivers/staging/comedi/drivers/dmm32at.c +++ b/drivers/staging/comedi/drivers/dmm32at.c @@ -404,7 +404,7 @@ static irqreturn_t dmm32at_isr(int irq, void *d) { struct comedi_device *dev = d; unsigned char intstat; - unsigned int val; + unsigned short val; int i; if (!dev->attached) { diff --git a/drivers/staging/comedi/drivers/me4000.c b/drivers/staging/comedi/drivers/me4000.c index 726e40dc17b6..0d3d4cafce2e 100644 --- a/drivers/staging/comedi/drivers/me4000.c +++ b/drivers/staging/comedi/drivers/me4000.c @@ -924,7 +924,7 @@ static irqreturn_t me4000_ai_isr(int irq, void *dev_id) struct comedi_subdevice *s = dev->read_subdev; int i; int c = 0; - unsigned int lval; + unsigned short lval; if (!dev->attached) return IRQ_NONE; diff --git a/drivers/staging/comedi/drivers/pcl711.c b/drivers/staging/comedi/drivers/pcl711.c index 2dbf69e30965..bd6f42fe9e3c 100644 --- a/drivers/staging/comedi/drivers/pcl711.c +++ b/drivers/staging/comedi/drivers/pcl711.c @@ -184,7 +184,7 @@ static irqreturn_t pcl711_interrupt(int irq, void *d) struct comedi_device *dev = d; struct comedi_subdevice *s = dev->read_subdev; struct comedi_cmd *cmd = &s->async->cmd; - unsigned int data; + unsigned short data; if (!dev->attached) { dev_err(dev->class_dev, "spurious interrupt\n"); diff --git a/drivers/staging/comedi/drivers/pcl818.c b/drivers/staging/comedi/drivers/pcl818.c index 63e3011158f2..f4b4a686c710 100644 --- a/drivers/staging/comedi/drivers/pcl818.c +++ b/drivers/staging/comedi/drivers/pcl818.c @@ -423,7 +423,7 @@ static int pcl818_ai_eoc(struct comedi_device *dev, static bool pcl818_ai_write_sample(struct comedi_device *dev, struct comedi_subdevice *s, - unsigned int chan, unsigned int val) + unsigned int chan, unsigned short val) { struct pcl818_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; diff --git a/drivers/staging/fwserial/fwserial.c b/drivers/staging/fwserial/fwserial.c index db83d34cd677..c368082aae1a 100644 --- a/drivers/staging/fwserial/fwserial.c +++ b/drivers/staging/fwserial/fwserial.c @@ -2189,6 +2189,7 @@ static int fwserial_create(struct fw_unit *unit) err = fw_core_add_address_handler(&port->rx_handler, &fw_high_memory_region); if (err) { + tty_port_destroy(&port->port); kfree(port); goto free_ports; } @@ -2271,6 +2272,7 @@ static int fwserial_create(struct fw_unit *unit) free_ports: for (--i; i >= 0; --i) { + fw_core_remove_address_handler(&serial->ports[i]->rx_handler); tty_port_destroy(&serial->ports[i]->port); kfree(serial->ports[i]); } diff --git a/drivers/staging/gdm724x/gdm_usb.c b/drivers/staging/gdm724x/gdm_usb.c index dc4da66c3695..54bdb64f52e8 100644 --- a/drivers/staging/gdm724x/gdm_usb.c +++ b/drivers/staging/gdm724x/gdm_usb.c @@ -56,20 +56,24 @@ static int gdm_usb_recv(void *priv_dev, static int request_mac_address(struct lte_udev *udev) { - u8 buf[16] = {0,}; - struct hci_packet *hci = (struct hci_packet *)buf; + struct hci_packet *hci; struct usb_device *usbdev = udev->usbdev; int actual; int ret = -1; + hci = kmalloc(struct_size(hci, data, 1), GFP_KERNEL); + if (!hci) + return -ENOMEM; + hci->cmd_evt = gdm_cpu_to_dev16(udev->gdm_ed, LTE_GET_INFORMATION); hci->len = gdm_cpu_to_dev16(udev->gdm_ed, 1); hci->data[0] = MAC_ADDRESS; - ret = usb_bulk_msg(usbdev, usb_sndbulkpipe(usbdev, 2), buf, 5, + ret = usb_bulk_msg(usbdev, usb_sndbulkpipe(usbdev, 2), hci, 5, &actual, 1000); udev->request_mac_addr = 1; + kfree(hci); return ret; } diff --git a/drivers/staging/ks7010/ks_wlan_net.c b/drivers/staging/ks7010/ks_wlan_net.c index dc09cc6e1c47..09e7b4cd0138 100644 --- a/drivers/staging/ks7010/ks_wlan_net.c +++ b/drivers/staging/ks7010/ks_wlan_net.c @@ -1120,6 +1120,7 @@ static int ks_wlan_set_scan(struct net_device *dev, { struct ks_wlan_private *priv = netdev_priv(dev); struct iw_scan_req *req = NULL; + int len; if (priv->sleep_mode == SLP_SLEEP) return -EPERM; @@ -1129,8 +1130,9 @@ static int ks_wlan_set_scan(struct net_device *dev, if (wrqu->data.length == sizeof(struct iw_scan_req) && wrqu->data.flags & IW_SCAN_THIS_ESSID) { req = (struct iw_scan_req *)extra; - priv->scan_ssid_len = req->essid_len; - memcpy(priv->scan_ssid, req->essid, priv->scan_ssid_len); + len = min_t(int, req->essid_len, IW_ESSID_MAX_SIZE); + priv->scan_ssid_len = len; + memcpy(priv->scan_ssid, req->essid, len); } else { priv->scan_ssid_len = 0; } diff --git a/drivers/staging/media/allegro-dvt/allegro-core.c b/drivers/staging/media/allegro-dvt/allegro-core.c index 9f718f43282b..640451134072 100644 --- a/drivers/staging/media/allegro-dvt/allegro-core.c +++ b/drivers/staging/media/allegro-dvt/allegro-core.c @@ -2483,8 +2483,6 @@ static int allegro_open(struct file *file) INIT_LIST_HEAD(&channel->buffers_reference); INIT_LIST_HEAD(&channel->buffers_intermediate); - list_add(&channel->list, &dev->channels); - channel->fh.m2m_ctx = v4l2_m2m_ctx_init(dev->m2m_dev, channel, allegro_queue_init); @@ -2493,6 +2491,7 @@ static int allegro_open(struct file *file) goto error; } + list_add(&channel->list, &dev->channels); file->private_data = &channel->fh; v4l2_fh_add(&channel->fh); diff --git a/drivers/staging/media/atomisp/pci/atomisp_subdev.c b/drivers/staging/media/atomisp/pci/atomisp_subdev.c index b666cb23e5ca..2ef5f44e4b6b 100644 --- a/drivers/staging/media/atomisp/pci/atomisp_subdev.c +++ b/drivers/staging/media/atomisp/pci/atomisp_subdev.c @@ -349,12 +349,20 @@ static int isp_subdev_get_selection(struct v4l2_subdev *sd, return 0; } -static char *atomisp_pad_str[] = { "ATOMISP_SUBDEV_PAD_SINK", - "ATOMISP_SUBDEV_PAD_SOURCE_CAPTURE", - "ATOMISP_SUBDEV_PAD_SOURCE_VF", - "ATOMISP_SUBDEV_PAD_SOURCE_PREVIEW", - "ATOMISP_SUBDEV_PAD_SOURCE_VIDEO" - }; +static const char *atomisp_pad_str(unsigned int pad) +{ + static const char *const pad_str[] = { + "ATOMISP_SUBDEV_PAD_SINK", + "ATOMISP_SUBDEV_PAD_SOURCE_CAPTURE", + "ATOMISP_SUBDEV_PAD_SOURCE_VF", + "ATOMISP_SUBDEV_PAD_SOURCE_PREVIEW", + "ATOMISP_SUBDEV_PAD_SOURCE_VIDEO", + }; + + if (pad >= ARRAY_SIZE(pad_str)) + return "ATOMISP_INVALID_PAD"; + return pad_str[pad]; +} int atomisp_subdev_set_selection(struct v4l2_subdev *sd, struct v4l2_subdev_pad_config *cfg, @@ -378,7 +386,7 @@ int atomisp_subdev_set_selection(struct v4l2_subdev *sd, dev_dbg(isp->dev, "sel: pad %s tgt %s l %d t %d w %d h %d which %s f 0x%8.8x\n", - atomisp_pad_str[pad], target == V4L2_SEL_TGT_CROP + atomisp_pad_str(pad), target == V4L2_SEL_TGT_CROP ? "V4L2_SEL_TGT_CROP" : "V4L2_SEL_TGT_COMPOSE", r->left, r->top, r->width, r->height, which == V4L2_SUBDEV_FORMAT_TRY ? "V4L2_SUBDEV_FORMAT_TRY" @@ -612,7 +620,7 @@ void atomisp_subdev_set_ffmt(struct v4l2_subdev *sd, enum atomisp_input_stream_id stream_id; dev_dbg(isp->dev, "ffmt: pad %s w %d h %d code 0x%8.8x which %s\n", - atomisp_pad_str[pad], ffmt->width, ffmt->height, ffmt->code, + atomisp_pad_str(pad), ffmt->width, ffmt->height, ffmt->code, which == V4L2_SUBDEV_FORMAT_TRY ? "V4L2_SUBDEV_FORMAT_TRY" : "V4L2_SUBDEV_FORMAT_ACTIVE"); diff --git a/drivers/staging/media/atomisp/pci/hmm/hmm.c b/drivers/staging/media/atomisp/pci/hmm/hmm.c index e0eaff0f8a22..6a5ee4607089 100644 --- a/drivers/staging/media/atomisp/pci/hmm/hmm.c +++ b/drivers/staging/media/atomisp/pci/hmm/hmm.c @@ -269,7 +269,7 @@ ia_css_ptr hmm_alloc(size_t bytes, enum hmm_bo_type type, hmm_set(bo->start, 0, bytes); dev_dbg(atomisp_dev, - "%s: pages: 0x%08x (%ld bytes), type: %d from highmem %d, user ptr %p, cached %d\n", + "%s: pages: 0x%08x (%zu bytes), type: %d from highmem %d, user ptr %p, cached %d\n", __func__, bo->start, bytes, type, from_highmem, userptr, cached); return bo->start; diff --git a/drivers/staging/media/imx/imx-media-csc-scaler.c b/drivers/staging/media/imx/imx-media-csc-scaler.c index fab1155a5958..63a0204502a8 100644 --- a/drivers/staging/media/imx/imx-media-csc-scaler.c +++ b/drivers/staging/media/imx/imx-media-csc-scaler.c @@ -869,11 +869,7 @@ void imx_media_csc_scaler_device_unregister(struct imx_media_video_dev *vdev) struct ipu_csc_scaler_priv *priv = vdev_to_priv(vdev); struct video_device *vfd = priv->vdev.vfd; - mutex_lock(&priv->mutex); - video_unregister_device(vfd); - - mutex_unlock(&priv->mutex); } struct imx_media_video_dev * diff --git a/drivers/staging/media/imx/imx-media-dev.c b/drivers/staging/media/imx/imx-media-dev.c index 6d2205461e56..338b8bd0bb07 100644 --- a/drivers/staging/media/imx/imx-media-dev.c +++ b/drivers/staging/media/imx/imx-media-dev.c @@ -53,6 +53,7 @@ static int imx6_media_probe_complete(struct v4l2_async_notifier *notifier) imxmd->m2m_vdev = imx_media_csc_scaler_device_init(imxmd); if (IS_ERR(imxmd->m2m_vdev)) { ret = PTR_ERR(imxmd->m2m_vdev); + imxmd->m2m_vdev = NULL; goto unlock; } @@ -107,10 +108,14 @@ static int imx_media_remove(struct platform_device *pdev) v4l2_info(&imxmd->v4l2_dev, "Removing imx-media\n"); + if (imxmd->m2m_vdev) { + imx_media_csc_scaler_device_unregister(imxmd->m2m_vdev); + imxmd->m2m_vdev = NULL; + } + v4l2_async_notifier_unregister(&imxmd->notifier); imx_media_unregister_ipu_internal_subdevs(imxmd); v4l2_async_notifier_cleanup(&imxmd->notifier); - imx_media_csc_scaler_device_unregister(imxmd->m2m_vdev); media_device_unregister(&imxmd->md); v4l2_device_unregister(&imxmd->v4l2_dev); media_device_cleanup(&imxmd->md); diff --git a/drivers/staging/media/imx/imx7-media-csi.c b/drivers/staging/media/imx/imx7-media-csi.c index a3f3df901704..ac52b1daf991 100644 --- a/drivers/staging/media/imx/imx7-media-csi.c +++ b/drivers/staging/media/imx/imx7-media-csi.c @@ -499,6 +499,7 @@ static int imx7_csi_pad_link_validate(struct v4l2_subdev *sd, struct v4l2_subdev_format *sink_fmt) { struct imx7_csi *csi = v4l2_get_subdevdata(sd); + struct media_entity *src; struct media_pad *pad; int ret; @@ -509,11 +510,21 @@ static int imx7_csi_pad_link_validate(struct v4l2_subdev *sd, if (!csi->src_sd) return -EPIPE; + src = &csi->src_sd->entity; + + /* + * if the source is neither a CSI MUX or CSI-2 get the one directly + * upstream from this CSI + */ + if (src->function != MEDIA_ENT_F_VID_IF_BRIDGE && + src->function != MEDIA_ENT_F_VID_MUX) + src = &csi->sd.entity; + /* - * find the entity that is selected by the CSI mux. This is needed + * find the entity that is selected by the source. This is needed * to distinguish between a parallel or CSI-2 pipeline. */ - pad = imx_media_pipeline_pad(&csi->src_sd->entity, 0, 0, true); + pad = imx_media_pipeline_pad(src, 0, 0, true); if (!pad) return -ENODEV; @@ -1164,12 +1175,12 @@ static int imx7_csi_notify_bound(struct v4l2_async_notifier *notifier, struct imx7_csi *csi = imx7_csi_notifier_to_dev(notifier); struct media_pad *sink = &csi->sd.entity.pads[IMX7_CSI_PAD_SINK]; - /* The bound subdev must always be the CSI mux */ - if (WARN_ON(sd->entity.function != MEDIA_ENT_F_VID_MUX)) - return -ENXIO; - - /* Mark it as such via its group id */ - sd->grp_id = IMX_MEDIA_GRP_ID_CSI_MUX; + /* + * If the subdev is a video mux, it must be one of the CSI + * muxes. Mark it as such via its group id. + */ + if (sd->entity.function == MEDIA_ENT_F_VID_MUX) + sd->grp_id = IMX_MEDIA_GRP_ID_CSI_MUX; return v4l2_create_fwnode_links_to_pad(sd, sink); } diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.c b/drivers/staging/media/sunxi/cedrus/cedrus.c index ddad5d274ee8..7bd9291c8d5f 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus.c @@ -34,56 +34,48 @@ static const struct cedrus_control cedrus_controls[] = { .id = V4L2_CID_MPEG_VIDEO_MPEG2_SLICE_PARAMS, }, .codec = CEDRUS_CODEC_MPEG2, - .required = true, }, { .cfg = { .id = V4L2_CID_MPEG_VIDEO_MPEG2_QUANTIZATION, }, .codec = CEDRUS_CODEC_MPEG2, - .required = false, }, { .cfg = { .id = V4L2_CID_STATELESS_H264_DECODE_PARAMS, }, .codec = CEDRUS_CODEC_H264, - .required = true, }, { .cfg = { .id = V4L2_CID_STATELESS_H264_SLICE_PARAMS, }, .codec = CEDRUS_CODEC_H264, - .required = true, }, { .cfg = { .id = V4L2_CID_STATELESS_H264_SPS, }, .codec = CEDRUS_CODEC_H264, - .required = true, }, { .cfg = { .id = V4L2_CID_STATELESS_H264_PPS, }, .codec = CEDRUS_CODEC_H264, - .required = true, }, { .cfg = { .id = V4L2_CID_STATELESS_H264_SCALING_MATRIX, }, .codec = CEDRUS_CODEC_H264, - .required = false, }, { .cfg = { .id = V4L2_CID_STATELESS_H264_PRED_WEIGHTS, }, .codec = CEDRUS_CODEC_H264, - .required = false, }, { .cfg = { @@ -92,7 +84,6 @@ static const struct cedrus_control cedrus_controls[] = { .def = V4L2_STATELESS_H264_DECODE_MODE_SLICE_BASED, }, .codec = CEDRUS_CODEC_H264, - .required = false, }, { .cfg = { @@ -101,7 +92,6 @@ static const struct cedrus_control cedrus_controls[] = { .def = V4L2_STATELESS_H264_START_CODE_NONE, }, .codec = CEDRUS_CODEC_H264, - .required = false, }, /* * We only expose supported profiles information, @@ -120,28 +110,24 @@ static const struct cedrus_control cedrus_controls[] = { BIT(V4L2_MPEG_VIDEO_H264_PROFILE_EXTENDED), }, .codec = CEDRUS_CODEC_H264, - .required = false, }, { .cfg = { .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS, }, .codec = CEDRUS_CODEC_H265, - .required = true, }, { .cfg = { .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS, }, .codec = CEDRUS_CODEC_H265, - .required = true, }, { .cfg = { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS, }, .codec = CEDRUS_CODEC_H265, - .required = true, }, { .cfg = { @@ -150,7 +136,6 @@ static const struct cedrus_control cedrus_controls[] = { .def = V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED, }, .codec = CEDRUS_CODEC_H265, - .required = false, }, { .cfg = { @@ -159,14 +144,12 @@ static const struct cedrus_control cedrus_controls[] = { .def = V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE, }, .codec = CEDRUS_CODEC_H265, - .required = false, }, { .cfg = { .id = V4L2_CID_MPEG_VIDEO_VP8_FRAME_HEADER, }, .codec = CEDRUS_CODEC_VP8, - .required = true, }, }; @@ -227,12 +210,8 @@ static int cedrus_init_ctrls(struct cedrus_dev *dev, struct cedrus_ctx *ctx) static int cedrus_request_validate(struct media_request *req) { struct media_request_object *obj; - struct v4l2_ctrl_handler *parent_hdl, *hdl; struct cedrus_ctx *ctx = NULL; - struct v4l2_ctrl *ctrl_test; unsigned int count; - unsigned int i; - int ret = 0; list_for_each_entry(obj, &req->objects, list) { struct vb2_buffer *vb; @@ -259,34 +238,6 @@ static int cedrus_request_validate(struct media_request *req) return -EINVAL; } - parent_hdl = &ctx->hdl; - - hdl = v4l2_ctrl_request_hdl_find(req, parent_hdl); - if (!hdl) { - v4l2_info(&ctx->dev->v4l2_dev, "Missing codec control(s)\n"); - return -ENOENT; - } - - for (i = 0; i < CEDRUS_CONTROLS_COUNT; i++) { - if (cedrus_controls[i].codec != ctx->current_codec || - !cedrus_controls[i].required) - continue; - - ctrl_test = v4l2_ctrl_request_hdl_ctrl_find(hdl, - cedrus_controls[i].cfg.id); - if (!ctrl_test) { - v4l2_info(&ctx->dev->v4l2_dev, - "Missing required codec control\n"); - ret = -ENOENT; - break; - } - } - - v4l2_ctrl_request_hdl_put(hdl); - - if (ret) - return ret; - return vb2_request_validate(req); } diff --git a/drivers/staging/media/sunxi/cedrus/cedrus.h b/drivers/staging/media/sunxi/cedrus/cedrus.h index c96077aaef49..251a6a660351 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus.h +++ b/drivers/staging/media/sunxi/cedrus/cedrus.h @@ -56,7 +56,6 @@ enum cedrus_h264_pic_type { struct cedrus_control { struct v4l2_ctrl_config cfg; enum cedrus_codec codec; - unsigned char required:1; }; struct cedrus_h264_run { diff --git a/drivers/staging/most/sound/sound.c b/drivers/staging/most/sound/sound.c index 3a1a59058042..45befb8c1126 100644 --- a/drivers/staging/most/sound/sound.c +++ b/drivers/staging/most/sound/sound.c @@ -86,6 +86,8 @@ static void swap_copy24(u8 *dest, const u8 *source, unsigned int bytes) { unsigned int i = 0; + if (bytes < 2) + return; while (i < bytes - 2) { dest[i] = source[i + 2]; dest[i + 1] = source[i + 1]; diff --git a/drivers/staging/mt7621-dma/Makefile b/drivers/staging/mt7621-dma/Makefile index 66da1bf10c32..23256d1286f3 100644 --- a/drivers/staging/mt7621-dma/Makefile +++ b/drivers/staging/mt7621-dma/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_MTK_HSDMA) += mtk-hsdma.o +obj-$(CONFIG_MTK_HSDMA) += hsdma-mt7621.o ccflags-y += -I$(srctree)/drivers/dma diff --git a/drivers/staging/mt7621-dma/mtk-hsdma.c b/drivers/staging/mt7621-dma/hsdma-mt7621.c similarity index 99% rename from drivers/staging/mt7621-dma/mtk-hsdma.c rename to drivers/staging/mt7621-dma/hsdma-mt7621.c index bc4bb4374313..b0ed935de7ac 100644 --- a/drivers/staging/mt7621-dma/mtk-hsdma.c +++ b/drivers/staging/mt7621-dma/hsdma-mt7621.c @@ -749,7 +749,7 @@ static struct platform_driver mtk_hsdma_driver = { .probe = mtk_hsdma_probe, .remove = mtk_hsdma_remove, .driver = { - .name = "hsdma-mt7621", + .name = KBUILD_MODNAME, .of_match_table = mtk_hsdma_of_match, }, }; diff --git a/drivers/staging/rtl8188eu/core/rtw_ap.c b/drivers/staging/rtl8188eu/core/rtw_ap.c index fa1e34a0d456..182bb944c9b3 100644 --- a/drivers/staging/rtl8188eu/core/rtw_ap.c +++ b/drivers/staging/rtl8188eu/core/rtw_ap.c @@ -791,6 +791,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len) p = rtw_get_ie(ie + _BEACON_IE_OFFSET_, WLAN_EID_SSID, &ie_len, pbss_network->ie_length - _BEACON_IE_OFFSET_); if (p && ie_len > 0) { + ie_len = min_t(int, ie_len, sizeof(pbss_network->ssid.ssid)); memset(&pbss_network->ssid, 0, sizeof(struct ndis_802_11_ssid)); memcpy(pbss_network->ssid.ssid, p + 2, ie_len); pbss_network->ssid.ssid_length = ie_len; @@ -811,6 +812,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len) p = rtw_get_ie(ie + _BEACON_IE_OFFSET_, WLAN_EID_SUPP_RATES, &ie_len, pbss_network->ie_length - _BEACON_IE_OFFSET_); if (p) { + ie_len = min_t(int, ie_len, NDIS_802_11_LENGTH_RATES_EX); memcpy(supportRate, p + 2, ie_len); supportRateNum = ie_len; } @@ -819,6 +821,8 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len) p = rtw_get_ie(ie + _BEACON_IE_OFFSET_, WLAN_EID_EXT_SUPP_RATES, &ie_len, pbss_network->ie_length - _BEACON_IE_OFFSET_); if (p) { + ie_len = min_t(int, ie_len, + NDIS_802_11_LENGTH_RATES_EX - supportRateNum); memcpy(supportRate + supportRateNum, p + 2, ie_len); supportRateNum += ie_len; } @@ -934,6 +938,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf, int len) pht_cap->mcs.rx_mask[0] = 0xff; pht_cap->mcs.rx_mask[1] = 0x0; + ie_len = min_t(int, ie_len, sizeof(pmlmepriv->htpriv.ht_cap)); memcpy(&pmlmepriv->htpriv.ht_cap, p + 2, ie_len); } diff --git a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c index 6f42f13a71fa..f92fcb623a2c 100644 --- a/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c +++ b/drivers/staging/rtl8188eu/os_dep/ioctl_linux.c @@ -1133,9 +1133,11 @@ static int rtw_wx_set_scan(struct net_device *dev, struct iw_request_info *a, break; } sec_len = *(pos++); len -= 1; - if (sec_len > 0 && sec_len <= len) { + if (sec_len > 0 && + sec_len <= len && + sec_len <= 32) { ssid[ssid_index].ssid_length = sec_len; - memcpy(ssid[ssid_index].ssid, pos, ssid[ssid_index].ssid_length); + memcpy(ssid[ssid_index].ssid, pos, sec_len); ssid_index++; } pos += sec_len; diff --git a/drivers/staging/rtl8188eu/os_dep/usb_intf.c b/drivers/staging/rtl8188eu/os_dep/usb_intf.c index 43ebd11b53fe..efad43d8e465 100644 --- a/drivers/staging/rtl8188eu/os_dep/usb_intf.c +++ b/drivers/staging/rtl8188eu/os_dep/usb_intf.c @@ -41,6 +41,7 @@ static const struct usb_device_id rtw_usb_id_tbl[] = { {USB_DEVICE(0x2357, 0x0111)}, /* TP-Link TL-WN727N v5.21 */ {USB_DEVICE(0x2C4E, 0x0102)}, /* MERCUSYS MW150US v2 */ {USB_DEVICE(0x0df6, 0x0076)}, /* Sitecom N150 v2 */ + {USB_DEVICE(0x7392, 0xb811)}, /* Edimax EW-7811UN V2 */ {USB_DEVICE(USB_VENDER_ID_REALTEK, 0xffef)}, /* Rosewill RNX-N150NUB */ {} /* Terminating entry */ }; diff --git a/drivers/staging/rtl8192e/Kconfig b/drivers/staging/rtl8192e/Kconfig index 03fcc23516fd..6e7d84ac06f5 100644 --- a/drivers/staging/rtl8192e/Kconfig +++ b/drivers/staging/rtl8192e/Kconfig @@ -26,6 +26,7 @@ config RTLLIB_CRYPTO_CCMP config RTLLIB_CRYPTO_TKIP tristate "Support for rtllib TKIP crypto" depends on RTLLIB + select CRYPTO select CRYPTO_LIB_ARC4 select CRYPTO_MICHAEL_MIC default y diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c index 16bcee13f64b..407effde5e71 100644 --- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c +++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c @@ -406,9 +406,10 @@ static int _rtl92e_wx_set_scan(struct net_device *dev, struct iw_scan_req *req = (struct iw_scan_req *)b; if (req->essid_len) { - ieee->current_network.ssid_len = req->essid_len; - memcpy(ieee->current_network.ssid, req->essid, - req->essid_len); + int len = min_t(int, req->essid_len, IW_ESSID_MAX_SIZE); + + ieee->current_network.ssid_len = len; + memcpy(ieee->current_network.ssid, req->essid, len); } } diff --git a/drivers/staging/rtl8192u/r8192U_wx.c b/drivers/staging/rtl8192u/r8192U_wx.c index d853586705fc..77bf88696a84 100644 --- a/drivers/staging/rtl8192u/r8192U_wx.c +++ b/drivers/staging/rtl8192u/r8192U_wx.c @@ -331,8 +331,10 @@ static int r8192_wx_set_scan(struct net_device *dev, struct iw_request_info *a, struct iw_scan_req *req = (struct iw_scan_req *)b; if (req->essid_len) { - ieee->current_network.ssid_len = req->essid_len; - memcpy(ieee->current_network.ssid, req->essid, req->essid_len); + int len = min_t(int, req->essid_len, IW_ESSID_MAX_SIZE); + + ieee->current_network.ssid_len = len; + memcpy(ieee->current_network.ssid, req->essid, len); } } diff --git a/drivers/staging/rtl8712/rtl871x_cmd.c b/drivers/staging/rtl8712/rtl871x_cmd.c index 18116469bd31..75716f59044d 100644 --- a/drivers/staging/rtl8712/rtl871x_cmd.c +++ b/drivers/staging/rtl8712/rtl871x_cmd.c @@ -192,8 +192,10 @@ u8 r8712_sitesurvey_cmd(struct _adapter *padapter, psurveyPara->ss_ssidlen = 0; memset(psurveyPara->ss_ssid, 0, IW_ESSID_MAX_SIZE + 1); if (pssid && pssid->SsidLength) { - memcpy(psurveyPara->ss_ssid, pssid->Ssid, pssid->SsidLength); - psurveyPara->ss_ssidlen = cpu_to_le32(pssid->SsidLength); + int len = min_t(int, pssid->SsidLength, IW_ESSID_MAX_SIZE); + + memcpy(psurveyPara->ss_ssid, pssid->Ssid, len); + psurveyPara->ss_ssidlen = cpu_to_le32(len); } set_fwstate(pmlmepriv, _FW_UNDER_SURVEY); r8712_enqueue_cmd(pcmdpriv, ph2c); diff --git a/drivers/staging/rtl8712/rtl871x_ioctl_linux.c b/drivers/staging/rtl8712/rtl871x_ioctl_linux.c index cbaa7a489748..2a661b04cd25 100644 --- a/drivers/staging/rtl8712/rtl871x_ioctl_linux.c +++ b/drivers/staging/rtl8712/rtl871x_ioctl_linux.c @@ -924,7 +924,7 @@ static int r871x_wx_set_priv(struct net_device *dev, struct iw_point *dwrq = (struct iw_point *)awrq; len = dwrq->length; - ext = memdup_user(dwrq->pointer, len); + ext = strndup_user(dwrq->pointer, len); if (IS_ERR(ext)) return PTR_ERR(ext); diff --git a/drivers/staging/rtl8723bs/os_dep/wifi_regd.c b/drivers/staging/rtl8723bs/os_dep/wifi_regd.c index 2833fc6901e6..3f04b7a954ba 100644 --- a/drivers/staging/rtl8723bs/os_dep/wifi_regd.c +++ b/drivers/staging/rtl8723bs/os_dep/wifi_regd.c @@ -34,7 +34,7 @@ NL80211_RRF_PASSIVE_SCAN) static const struct ieee80211_regdomain rtw_regdom_rd = { - .n_reg_rules = 3, + .n_reg_rules = 2, .alpha2 = "99", .reg_rules = { RTW_2GHZ_CH01_11, diff --git a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-ctl.c b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-ctl.c index 4c2cae99776b..3703409715da 100644 --- a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-ctl.c +++ b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-ctl.c @@ -224,7 +224,7 @@ int snd_bcm2835_new_ctl(struct bcm2835_chip *chip) { int err; - strcpy(chip->card->mixername, "Broadcom Mixer"); + strscpy(chip->card->mixername, "Broadcom Mixer", sizeof(chip->card->mixername)); err = create_ctls(chip, ARRAY_SIZE(snd_bcm2835_ctl), snd_bcm2835_ctl); if (err < 0) return err; @@ -261,7 +261,7 @@ static const struct snd_kcontrol_new snd_bcm2835_headphones_ctl[] = { int snd_bcm2835_new_headphones_ctl(struct bcm2835_chip *chip) { - strcpy(chip->card->mixername, "Broadcom Mixer"); + strscpy(chip->card->mixername, "Broadcom Mixer", sizeof(chip->card->mixername)); return create_ctls(chip, ARRAY_SIZE(snd_bcm2835_headphones_ctl), snd_bcm2835_headphones_ctl); } @@ -295,7 +295,7 @@ static const struct snd_kcontrol_new snd_bcm2835_hdmi[] = { int snd_bcm2835_new_hdmi_ctl(struct bcm2835_chip *chip) { - strcpy(chip->card->mixername, "Broadcom Mixer"); + strscpy(chip->card->mixername, "Broadcom Mixer", sizeof(chip->card->mixername)); return create_ctls(chip, ARRAY_SIZE(snd_bcm2835_hdmi), snd_bcm2835_hdmi); } diff --git a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c index f783b632141b..096f2c54258a 100644 --- a/drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c +++ b/drivers/staging/vc04_services/bcm2835-audio/bcm2835-pcm.c @@ -334,7 +334,7 @@ int snd_bcm2835_new_pcm(struct bcm2835_chip *chip, const char *name, pcm->private_data = chip; pcm->nonatomic = true; - strcpy(pcm->name, name); + strscpy(pcm->name, name, sizeof(pcm->name)); if (!spdif) { chip->dest = route; chip->volume = 0; diff --git a/drivers/staging/vc04_services/bcm2835-audio/bcm2835.c b/drivers/staging/vc04_services/bcm2835-audio/bcm2835.c index cf5f80f5ca6b..c250fbef2fa3 100644 --- a/drivers/staging/vc04_services/bcm2835-audio/bcm2835.c +++ b/drivers/staging/vc04_services/bcm2835-audio/bcm2835.c @@ -185,9 +185,9 @@ static int snd_add_child_device(struct device *dev, goto error; } - strcpy(card->driver, audio_driver->driver.name); - strcpy(card->shortname, audio_driver->shortname); - strcpy(card->longname, audio_driver->longname); + strscpy(card->driver, audio_driver->driver.name, sizeof(card->driver)); + strscpy(card->shortname, audio_driver->shortname, sizeof(card->shortname)); + strscpy(card->longname, audio_driver->longname, sizeof(card->longname)); err = audio_driver->newpcm(chip, audio_driver->shortname, audio_driver->route, diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index f500a7043805..2ca5805b2fce 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -958,7 +958,7 @@ static int vchiq_irq_queue_bulk_tx_rx(struct vchiq_instance *instance, struct vchiq_service *service; struct bulk_waiter_node *waiter = NULL; bool found = false; - void *userdata = NULL; + void *userdata; int status = 0; int ret; @@ -997,6 +997,8 @@ static int vchiq_irq_queue_bulk_tx_rx(struct vchiq_instance *instance, "found bulk_waiter %pK for pid %d", waiter, current->pid); userdata = &waiter->bulk_waiter; + } else { + userdata = args->userdata; } /* @@ -1715,7 +1717,7 @@ vchiq_compat_ioctl_queue_bulk(struct file *file, { struct vchiq_queue_bulk_transfer32 args32; struct vchiq_queue_bulk_transfer args; - enum vchiq_bulk_dir dir = (cmd == VCHIQ_IOC_QUEUE_BULK_TRANSMIT) ? + enum vchiq_bulk_dir dir = (cmd == VCHIQ_IOC_QUEUE_BULK_TRANSMIT32) ? VCHIQ_BULK_TRANSMIT : VCHIQ_BULK_RECEIVE; if (copy_from_user(&args32, argp, sizeof(args32))) diff --git a/drivers/staging/wfx/data_tx.c b/drivers/staging/wfx/data_tx.c index 36b36ef39d05..77fb104efdec 100644 --- a/drivers/staging/wfx/data_tx.c +++ b/drivers/staging/wfx/data_tx.c @@ -331,6 +331,7 @@ static int wfx_tx_inner(struct wfx_vif *wvif, struct ieee80211_sta *sta, { struct hif_msg *hif_msg; struct hif_req_tx *req; + struct wfx_tx_priv *tx_priv; struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); struct ieee80211_key_conf *hw_key = tx_info->control.hw_key; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; @@ -344,11 +345,14 @@ static int wfx_tx_inner(struct wfx_vif *wvif, struct ieee80211_sta *sta, // From now tx_info->control is unusable memset(tx_info->rate_driver_data, 0, sizeof(struct wfx_tx_priv)); + // Fill tx_priv + tx_priv = (struct wfx_tx_priv *)tx_info->rate_driver_data; + tx_priv->icv_size = wfx_tx_get_icv_len(hw_key); // Fill hif_msg WARN(skb_headroom(skb) < wmsg_len, "not enough space in skb"); WARN(offset & 1, "attempt to transmit an unaligned frame"); - skb_put(skb, wfx_tx_get_icv_len(hw_key)); + skb_put(skb, tx_priv->icv_size); skb_push(skb, wmsg_len); memset(skb->data, 0, wmsg_len); hif_msg = (struct hif_msg *)skb->data; @@ -484,6 +488,7 @@ static void wfx_tx_fill_rates(struct wfx_dev *wdev, void wfx_tx_confirm_cb(struct wfx_dev *wdev, const struct hif_cnf_tx *arg) { + const struct wfx_tx_priv *tx_priv; struct ieee80211_tx_info *tx_info; struct wfx_vif *wvif; struct sk_buff *skb; @@ -495,6 +500,7 @@ void wfx_tx_confirm_cb(struct wfx_dev *wdev, const struct hif_cnf_tx *arg) return; } tx_info = IEEE80211_SKB_CB(skb); + tx_priv = wfx_skb_tx_priv(skb); wvif = wdev_to_wvif(wdev, ((struct hif_msg *)skb->data)->interface); WARN_ON(!wvif); if (!wvif) @@ -503,6 +509,8 @@ void wfx_tx_confirm_cb(struct wfx_dev *wdev, const struct hif_cnf_tx *arg) // Note that wfx_pending_get_pkt_us_delay() get data from tx_info _trace_tx_stats(arg, skb, wfx_pending_get_pkt_us_delay(wdev, skb)); wfx_tx_fill_rates(wdev, tx_info, arg); + skb_trim(skb, skb->len - tx_priv->icv_size); + // From now, you can touch to tx_info->status, but do not touch to // tx_priv anymore // FIXME: use ieee80211_tx_info_clear_status() diff --git a/drivers/staging/wfx/data_tx.h b/drivers/staging/wfx/data_tx.h index 46c9fff7a870..401363d6b563 100644 --- a/drivers/staging/wfx/data_tx.h +++ b/drivers/staging/wfx/data_tx.h @@ -35,6 +35,7 @@ struct tx_policy_cache { struct wfx_tx_priv { ktime_t xmit_timestamp; + unsigned char icv_size; }; void wfx_tx_policy_init(struct wfx_vif *wvif); diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c index 9b3eb2e8c92a..b926e1d6c7b8 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_target.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c @@ -86,8 +86,7 @@ static int cxgbit_is_ofld_imm(const struct sk_buff *skb) if (likely(cxgbit_skcb_flags(skb) & SKCBF_TX_ISO)) length += sizeof(struct cpl_tx_data_iso); -#define MAX_IMM_TX_PKT_LEN 256 - return length <= MAX_IMM_TX_PKT_LEN; + return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN; } /* diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index 14db5e568f22..d4cc43afe05b 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -3739,6 +3739,7 @@ core_scsi3_pri_read_keys(struct se_cmd *cmd) spin_unlock(&dev->t10_pr.registration_lock); put_unaligned_be32(add_len, &buf[4]); + target_set_cmd_data_length(cmd, 8 + add_len); transport_kunmap_data_sg(cmd); @@ -3757,7 +3758,7 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd) struct t10_pr_registration *pr_reg; unsigned char *buf; u64 pr_res_key; - u32 add_len = 16; /* Hardcoded to 16 when a reservation is held. */ + u32 add_len = 0; if (cmd->data_length < 8) { pr_err("PRIN SA READ_RESERVATIONS SCSI Data Length: %u" @@ -3775,8 +3776,9 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd) pr_reg = dev->dev_pr_res_holder; if (pr_reg) { /* - * Set the hardcoded Additional Length + * Set the Additional Length to 16 when a reservation is held */ + add_len = 16; put_unaligned_be32(add_len, &buf[4]); if (cmd->data_length < 22) @@ -3812,6 +3814,8 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd) (pr_reg->pr_res_type & 0x0f); } + target_set_cmd_data_length(cmd, 8 + add_len); + err: spin_unlock(&dev->dev_reservation_lock); transport_kunmap_data_sg(cmd); @@ -3830,7 +3834,7 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd) struct se_device *dev = cmd->se_dev; struct t10_reservation *pr_tmpl = &dev->t10_pr; unsigned char *buf; - u16 add_len = 8; /* Hardcoded to 8. */ + u16 len = 8; /* Hardcoded to 8. */ if (cmd->data_length < 6) { pr_err("PRIN SA REPORT_CAPABILITIES SCSI Data Length:" @@ -3842,7 +3846,7 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd) if (!buf) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - put_unaligned_be16(add_len, &buf[0]); + put_unaligned_be16(len, &buf[0]); buf[2] |= 0x10; /* CRH: Compatible Reservation Hanlding bit. */ buf[2] |= 0x08; /* SIP_C: Specify Initiator Ports Capable bit */ buf[2] |= 0x04; /* ATP_C: All Target Ports Capable bit */ @@ -3871,6 +3875,8 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd) buf[4] |= 0x02; /* PR_TYPE_WRITE_EXCLUSIVE */ buf[5] |= 0x01; /* PR_TYPE_EXCLUSIVE_ACCESS_ALLREG */ + target_set_cmd_data_length(cmd, len); + transport_kunmap_data_sg(cmd); return 0; @@ -4031,6 +4037,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd) * Set ADDITIONAL_LENGTH */ put_unaligned_be32(add_len, &buf[4]); + target_set_cmd_data_length(cmd, 8 + add_len); transport_kunmap_data_sg(cmd); diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index fca4bd079d02..8a4d58fdc9fe 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -879,11 +879,9 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status) } EXPORT_SYMBOL(target_complete_cmd); -void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length) +void target_set_cmd_data_length(struct se_cmd *cmd, int length) { - if ((scsi_status == SAM_STAT_GOOD || - cmd->se_cmd_flags & SCF_TREAT_READ_AS_NORMAL) && - length < cmd->data_length) { + if (length < cmd->data_length) { if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) { cmd->residual_count += cmd->data_length - length; } else { @@ -893,6 +891,15 @@ void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int len cmd->data_length = length; } +} +EXPORT_SYMBOL(target_set_cmd_data_length); + +void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length) +{ + if (scsi_status == SAM_STAT_GOOD || + cmd->se_cmd_flags & SCF_TREAT_READ_AS_NORMAL) { + target_set_cmd_data_length(cmd, length); + } target_complete_cmd(cmd, scsi_status); } diff --git a/drivers/tee/optee/rpc.c b/drivers/tee/optee/rpc.c index 1e3614e4798f..6cbb3643c6c4 100644 --- a/drivers/tee/optee/rpc.c +++ b/drivers/tee/optee/rpc.c @@ -54,8 +54,9 @@ static void handle_rpc_func_cmd_get_time(struct optee_msg_arg *arg) static void handle_rpc_func_cmd_i2c_transfer(struct tee_context *ctx, struct optee_msg_arg *arg) { - struct i2c_client client = { 0 }; struct tee_param *params; + struct i2c_adapter *adapter; + struct i2c_msg msg = { }; size_t i; int ret = -EOPNOTSUPP; u8 attr[] = { @@ -85,48 +86,48 @@ static void handle_rpc_func_cmd_i2c_transfer(struct tee_context *ctx, goto bad; } - client.adapter = i2c_get_adapter(params[0].u.value.b); - if (!client.adapter) + adapter = i2c_get_adapter(params[0].u.value.b); + if (!adapter) goto bad; if (params[1].u.value.a & OPTEE_MSG_RPC_CMD_I2C_FLAGS_TEN_BIT) { - if (!i2c_check_functionality(client.adapter, + if (!i2c_check_functionality(adapter, I2C_FUNC_10BIT_ADDR)) { - i2c_put_adapter(client.adapter); + i2c_put_adapter(adapter); goto bad; } - client.flags = I2C_CLIENT_TEN; + msg.flags = I2C_M_TEN; } - client.addr = params[0].u.value.c; - snprintf(client.name, I2C_NAME_SIZE, "i2c%d", client.adapter->nr); + msg.addr = params[0].u.value.c; + msg.buf = params[2].u.memref.shm->kaddr; + msg.len = params[2].u.memref.size; switch (params[0].u.value.a) { case OPTEE_MSG_RPC_CMD_I2C_TRANSFER_RD: - ret = i2c_master_recv(&client, params[2].u.memref.shm->kaddr, - params[2].u.memref.size); + msg.flags |= I2C_M_RD; break; case OPTEE_MSG_RPC_CMD_I2C_TRANSFER_WR: - ret = i2c_master_send(&client, params[2].u.memref.shm->kaddr, - params[2].u.memref.size); break; default: - i2c_put_adapter(client.adapter); + i2c_put_adapter(adapter); goto bad; } + ret = i2c_transfer(adapter, &msg, 1); + if (ret < 0) { arg->ret = TEEC_ERROR_COMMUNICATION; } else { - params[3].u.value.a = ret; + params[3].u.value.a = msg.len; if (optee_to_msg_param(arg->params, arg->num_params, params)) arg->ret = TEEC_ERROR_BAD_PARAMETERS; else arg->ret = TEEC_SUCCESS; } - i2c_put_adapter(client.adapter); + i2c_put_adapter(adapter); kfree(params); return; bad: diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_cooling.c index 612f063c1cfc..ddc166e3a93e 100644 --- a/drivers/thermal/cpufreq_cooling.c +++ b/drivers/thermal/cpufreq_cooling.c @@ -441,7 +441,7 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, frequency = get_state_freq(cpufreq_cdev, state); ret = freq_qos_update_request(&cpufreq_cdev->qos_req, frequency); - if (ret > 0) { + if (ret >= 0) { cpufreq_cdev->cpufreq_state = state; cpus = cpufreq_cdev->policy->cpus; max_capacity = arch_scale_cpu_capacity(cpumask_first(cpus)); diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index a8572f49d3ad..0fc2dae329e5 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -762,12 +762,6 @@ static int tb_init_port(struct tb_port *port) tb_dump_port(port->sw->tb, &port->config); - /* Control port does not need HopID allocation */ - if (port->port) { - ida_init(&port->in_hopids); - ida_init(&port->out_hopids); - } - INIT_LIST_HEAD(&port->list); return 0; @@ -1789,10 +1783,8 @@ static void tb_switch_release(struct device *dev) dma_port_free(sw->dma_port); tb_switch_for_each_port(sw, port) { - if (!port->disabled) { - ida_destroy(&port->in_hopids); - ida_destroy(&port->out_hopids); - } + ida_destroy(&port->in_hopids); + ida_destroy(&port->out_hopids); } kfree(sw->uuid); @@ -1972,6 +1964,12 @@ struct tb_switch *tb_switch_alloc(struct tb *tb, struct device *parent, /* minimum setup for tb_find_cap and tb_drom_read to work */ sw->ports[i].sw = sw; sw->ports[i].port = i; + + /* Control port does not need HopID allocation */ + if (i) { + ida_init(&sw->ports[i].in_hopids); + ida_init(&sw->ports[i].out_hopids); + } } ret = tb_switch_find_vse_cap(sw, TB_VSE_CAP_PLUG_EVENTS); diff --git a/drivers/thunderbolt/tb.c b/drivers/thunderbolt/tb.c index 51d5b031cada..9932b1870e56 100644 --- a/drivers/thunderbolt/tb.c +++ b/drivers/thunderbolt/tb.c @@ -138,6 +138,10 @@ static void tb_discover_tunnels(struct tb_switch *sw) parent->boot = true; parent = tb_switch_parent(parent); } + } else if (tb_tunnel_is_dp(tunnel)) { + /* Keep the domain from powering down */ + pm_runtime_get_sync(&tunnel->src_port->sw->dev); + pm_runtime_get_sync(&tunnel->dst_port->sw->dev); } list_add_tail(&tunnel->list, &tcm->tunnel_list); diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index c676fa89ee0b..51dafc06f541 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -2559,7 +2559,8 @@ static void gsmld_write_wakeup(struct tty_struct *tty) */ static ssize_t gsmld_read(struct tty_struct *tty, struct file *file, - unsigned char __user *buf, size_t nr) + unsigned char *buf, size_t nr, + void **cookie, unsigned long offset) { return -EOPNOTSUPP; } diff --git a/drivers/tty/n_hdlc.c b/drivers/tty/n_hdlc.c index 12557ee1edb6..1363e659dc1d 100644 --- a/drivers/tty/n_hdlc.c +++ b/drivers/tty/n_hdlc.c @@ -416,13 +416,19 @@ static void n_hdlc_tty_receive(struct tty_struct *tty, const __u8 *data, * Returns the number of bytes returned or error code. */ static ssize_t n_hdlc_tty_read(struct tty_struct *tty, struct file *file, - __u8 __user *buf, size_t nr) + __u8 *kbuf, size_t nr, + void **cookie, unsigned long offset) { struct n_hdlc *n_hdlc = tty->disc_data; int ret = 0; struct n_hdlc_buf *rbuf; DECLARE_WAITQUEUE(wait, current); + /* Is this a repeated call for an rbuf we already found earlier? */ + rbuf = *cookie; + if (rbuf) + goto have_rbuf; + add_wait_queue(&tty->read_wait, &wait); for (;;) { @@ -436,25 +442,8 @@ static ssize_t n_hdlc_tty_read(struct tty_struct *tty, struct file *file, set_current_state(TASK_INTERRUPTIBLE); rbuf = n_hdlc_buf_get(&n_hdlc->rx_buf_list); - if (rbuf) { - if (rbuf->count > nr) { - /* too large for caller's buffer */ - ret = -EOVERFLOW; - } else { - __set_current_state(TASK_RUNNING); - if (copy_to_user(buf, rbuf->buf, rbuf->count)) - ret = -EFAULT; - else - ret = rbuf->count; - } - - if (n_hdlc->rx_free_buf_list.count > - DEFAULT_RX_BUF_COUNT) - kfree(rbuf); - else - n_hdlc_buf_put(&n_hdlc->rx_free_buf_list, rbuf); + if (rbuf) break; - } /* no data */ if (tty_io_nonblock(tty, file)) { @@ -473,6 +462,39 @@ static ssize_t n_hdlc_tty_read(struct tty_struct *tty, struct file *file, remove_wait_queue(&tty->read_wait, &wait); __set_current_state(TASK_RUNNING); + if (!rbuf) + return ret; + *cookie = rbuf; + +have_rbuf: + /* Have we used it up entirely? */ + if (offset >= rbuf->count) + goto done_with_rbuf; + + /* More data to go, but can't copy any more? EOVERFLOW */ + ret = -EOVERFLOW; + if (!nr) + goto done_with_rbuf; + + /* Copy as much data as possible */ + ret = rbuf->count - offset; + if (ret > nr) + ret = nr; + memcpy(kbuf, rbuf->buf+offset, ret); + offset += ret; + + /* If we still have data left, we leave the rbuf in the cookie */ + if (offset < rbuf->count) + return ret; + +done_with_rbuf: + *cookie = NULL; + + if (n_hdlc->rx_free_buf_list.count > DEFAULT_RX_BUF_COUNT) + kfree(rbuf); + else + n_hdlc_buf_put(&n_hdlc->rx_free_buf_list, rbuf); + return ret; } /* end of n_hdlc_tty_read() */ diff --git a/drivers/tty/n_null.c b/drivers/tty/n_null.c index 96feabae4740..ce03ae78f5c6 100644 --- a/drivers/tty/n_null.c +++ b/drivers/tty/n_null.c @@ -20,7 +20,8 @@ static void n_null_close(struct tty_struct *tty) } static ssize_t n_null_read(struct tty_struct *tty, struct file *file, - unsigned char __user * buf, size_t nr) + unsigned char *buf, size_t nr, + void **cookie, unsigned long offset) { return -EOPNOTSUPP; } diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c index 934dd2fb2ec8..3161f0a535e3 100644 --- a/drivers/tty/n_r3964.c +++ b/drivers/tty/n_r3964.c @@ -129,7 +129,7 @@ static void remove_client_block(struct r3964_info *pInfo, static int r3964_open(struct tty_struct *tty); static void r3964_close(struct tty_struct *tty); static ssize_t r3964_read(struct tty_struct *tty, struct file *file, - unsigned char __user * buf, size_t nr); + void *cookie, unsigned char *buf, size_t nr); static ssize_t r3964_write(struct tty_struct *tty, struct file *file, const unsigned char *buf, size_t nr); static int r3964_ioctl(struct tty_struct *tty, struct file *file, @@ -1058,7 +1058,8 @@ static void r3964_close(struct tty_struct *tty) } static ssize_t r3964_read(struct tty_struct *tty, struct file *file, - unsigned char __user * buf, size_t nr) + unsigned char *kbuf, size_t nr, + void **cookie, unsigned long offset) { struct r3964_info *pInfo = tty->disc_data; struct r3964_client_info *pClient; @@ -1109,10 +1110,7 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file, kfree(pMsg); TRACE_M("r3964_read - msg kfree %p", pMsg); - if (copy_to_user(buf, &theMsg, ret)) { - ret = -EFAULT; - goto unlock; - } + memcpy(kbuf, &theMsg, ret); TRACE_PS("read - return %d", ret); goto unlock; diff --git a/drivers/tty/n_tracerouter.c b/drivers/tty/n_tracerouter.c index 4479af4d2fa5..3490ed51b1a3 100644 --- a/drivers/tty/n_tracerouter.c +++ b/drivers/tty/n_tracerouter.c @@ -118,7 +118,9 @@ static void n_tracerouter_close(struct tty_struct *tty) * -EINVAL */ static ssize_t n_tracerouter_read(struct tty_struct *tty, struct file *file, - unsigned char __user *buf, size_t nr) { + unsigned char *buf, size_t nr, + void **cookie, unsigned long offset) +{ return -EINVAL; } diff --git a/drivers/tty/n_tracesink.c b/drivers/tty/n_tracesink.c index d96ba82cc356..1d9931041fd8 100644 --- a/drivers/tty/n_tracesink.c +++ b/drivers/tty/n_tracesink.c @@ -115,7 +115,9 @@ static void n_tracesink_close(struct tty_struct *tty) * -EINVAL */ static ssize_t n_tracesink_read(struct tty_struct *tty, struct file *file, - unsigned char __user *buf, size_t nr) { + unsigned char *buf, size_t nr, + void **cookie, unsigned long offset) +{ return -EINVAL; } diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c index 219e85756171..dce33405ecdb 100644 --- a/drivers/tty/n_tty.c +++ b/drivers/tty/n_tty.c @@ -164,29 +164,24 @@ static void zero_buffer(struct tty_struct *tty, u8 *buffer, int size) memset(buffer, 0x00, size); } -static int tty_copy_to_user(struct tty_struct *tty, void __user *to, - size_t tail, size_t n) +static void tty_copy(struct tty_struct *tty, void *to, size_t tail, size_t n) { struct n_tty_data *ldata = tty->disc_data; size_t size = N_TTY_BUF_SIZE - tail; void *from = read_buf_addr(ldata, tail); - int uncopied; if (n > size) { tty_audit_add_data(tty, from, size); - uncopied = copy_to_user(to, from, size); - zero_buffer(tty, from, size - uncopied); - if (uncopied) - return uncopied; + memcpy(to, from, size); + zero_buffer(tty, from, size); to += size; n -= size; from = ldata->read_buf; } tty_audit_add_data(tty, from, n); - uncopied = copy_to_user(to, from, n); - zero_buffer(tty, from, n - uncopied); - return uncopied; + memcpy(to, from, n); + zero_buffer(tty, from, n); } /** @@ -1944,42 +1939,38 @@ static inline int input_available_p(struct tty_struct *tty, int poll) /** * copy_from_read_buf - copy read data directly * @tty: terminal device - * @b: user data + * @kbp: data * @nr: size of data * * Helper function to speed up n_tty_read. It is only called when - * ICANON is off; it copies characters straight from the tty queue to - * user space directly. It can be profitably called twice; once to - * drain the space from the tail pointer to the (physical) end of the - * buffer, and once to drain the space from the (physical) beginning of - * the buffer to head pointer. + * ICANON is off; it copies characters straight from the tty queue. * * Called under the ldata->atomic_read_lock sem * + * Returns true if it successfully copied data, but there is still + * more data to be had. + * * n_tty_read()/consumer path: * caller holds non-exclusive termios_rwsem * read_tail published */ -static int copy_from_read_buf(struct tty_struct *tty, - unsigned char __user **b, +static bool copy_from_read_buf(struct tty_struct *tty, + unsigned char **kbp, size_t *nr) { struct n_tty_data *ldata = tty->disc_data; - int retval; size_t n; bool is_eof; size_t head = smp_load_acquire(&ldata->commit_head); size_t tail = ldata->read_tail & (N_TTY_BUF_SIZE - 1); - retval = 0; n = min(head - ldata->read_tail, N_TTY_BUF_SIZE - tail); n = min(*nr, n); if (n) { unsigned char *from = read_buf_addr(ldata, tail); - retval = copy_to_user(*b, from, n); - n -= retval; + memcpy(*kbp, from, n); is_eof = n == 1 && *from == EOF_CHAR(tty); tty_audit_add_data(tty, from, n); zero_buffer(tty, from, n); @@ -1987,22 +1978,25 @@ static int copy_from_read_buf(struct tty_struct *tty, /* Turn single EOF into zero-length read */ if (L_EXTPROC(tty) && ldata->icanon && is_eof && (head == ldata->read_tail)) - n = 0; - *b += n; + return false; + *kbp += n; *nr -= n; + + /* If we have more to copy, let the caller know */ + return head != ldata->read_tail; } - return retval; + return false; } /** * canon_copy_from_read_buf - copy read data in canonical mode * @tty: terminal device - * @b: user data + * @kbp: data * @nr: size of data * * Helper function for n_tty_read. It is only called when ICANON is on; * it copies one line of input up to and including the line-delimiting - * character into the user-space buffer. + * character into the result buffer. * * NB: When termios is changed from non-canonical to canonical mode and * the read buffer contains data, n_tty_set_termios() simulates an EOF @@ -2017,21 +2011,22 @@ static int copy_from_read_buf(struct tty_struct *tty, * read_tail published */ -static int canon_copy_from_read_buf(struct tty_struct *tty, - unsigned char __user **b, - size_t *nr) +static bool canon_copy_from_read_buf(struct tty_struct *tty, + unsigned char **kbp, + size_t *nr) { struct n_tty_data *ldata = tty->disc_data; size_t n, size, more, c; size_t eol; - size_t tail; - int ret, found = 0; + size_t tail, canon_head; + int found = 0; /* N.B. avoid overrun if nr == 0 */ if (!*nr) - return 0; + return false; - n = min(*nr + 1, smp_load_acquire(&ldata->canon_head) - ldata->read_tail); + canon_head = smp_load_acquire(&ldata->canon_head); + n = min(*nr + 1, canon_head - ldata->read_tail); tail = ldata->read_tail & (N_TTY_BUF_SIZE - 1); size = min_t(size_t, tail + n, N_TTY_BUF_SIZE); @@ -2061,10 +2056,8 @@ static int canon_copy_from_read_buf(struct tty_struct *tty, n_tty_trace("%s: eol:%zu found:%d n:%zu c:%zu tail:%zu more:%zu\n", __func__, eol, found, n, c, tail, more); - ret = tty_copy_to_user(tty, *b, tail, n); - if (ret) - return -EFAULT; - *b += n; + tty_copy(tty, *kbp, tail, n); + *kbp += n; *nr -= n; if (found) @@ -2077,8 +2070,11 @@ static int canon_copy_from_read_buf(struct tty_struct *tty, else ldata->push = 0; tty_audit_push(); + return false; } - return 0; + + /* No EOL found - do a continuation retry if there is more data */ + return ldata->read_tail != canon_head; } /** @@ -2129,10 +2125,11 @@ static int job_control(struct tty_struct *tty, struct file *file) */ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, - unsigned char __user *buf, size_t nr) + unsigned char *kbuf, size_t nr, + void **cookie, unsigned long offset) { struct n_tty_data *ldata = tty->disc_data; - unsigned char __user *b = buf; + unsigned char *kb = kbuf; DEFINE_WAIT_FUNC(wait, woken_wake_function); int c; int minimum, time; @@ -2141,6 +2138,30 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, int packet; size_t tail; + /* + * Is this a continuation of a read started earler? + * + * If so, we still hold the atomic_read_lock and the + * termios_rwsem, and can just continue to copy data. + */ + if (*cookie) { + if (ldata->icanon && !L_EXTPROC(tty)) { + if (canon_copy_from_read_buf(tty, &kb, &nr)) + return kb - kbuf; + } else { + if (copy_from_read_buf(tty, &kb, &nr)) + return kb - kbuf; + } + + /* No more data - release locks and stop retries */ + n_tty_kick_worker(tty); + n_tty_check_unthrottle(tty); + up_read(&tty->termios_rwsem); + mutex_unlock(&ldata->atomic_read_lock); + *cookie = NULL; + return kb - kbuf; + } + c = job_control(tty, file); if (c < 0) return c; @@ -2178,17 +2199,13 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, /* First test for status change. */ if (packet && tty->link->ctrl_status) { unsigned char cs; - if (b != buf) + if (kb != kbuf) break; spin_lock_irq(&tty->link->ctrl_lock); cs = tty->link->ctrl_status; tty->link->ctrl_status = 0; spin_unlock_irq(&tty->link->ctrl_lock); - if (put_user(cs, b)) { - retval = -EFAULT; - break; - } - b++; + *kb++ = cs; nr--; break; } @@ -2231,33 +2248,35 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, } if (ldata->icanon && !L_EXTPROC(tty)) { - retval = canon_copy_from_read_buf(tty, &b, &nr); - if (retval) - break; + if (canon_copy_from_read_buf(tty, &kb, &nr)) + goto more_to_be_read; } else { - int uncopied; - /* Deal with packet mode. */ - if (packet && b == buf) { - if (put_user(TIOCPKT_DATA, b)) { - retval = -EFAULT; - break; - } - b++; + if (packet && kb == kbuf) { + *kb++ = TIOCPKT_DATA; nr--; } - uncopied = copy_from_read_buf(tty, &b, &nr); - uncopied += copy_from_read_buf(tty, &b, &nr); - if (uncopied) { - retval = -EFAULT; - break; + /* + * Copy data, and if there is more to be had + * and we have nothing more to wait for, then + * let's mark us for retries. + * + * NOTE! We return here with both the termios_sem + * and atomic_read_lock still held, the retries + * will release them when done. + */ + if (copy_from_read_buf(tty, &kb, &nr) && kb - kbuf >= minimum) { +more_to_be_read: + remove_wait_queue(&tty->read_wait, &wait); + *cookie = cookie; + return kb - kbuf; } } n_tty_check_unthrottle(tty); - if (b - buf >= minimum) + if (kb - kbuf >= minimum) break; if (time) timeout = time; @@ -2269,8 +2288,8 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file, remove_wait_queue(&tty->read_wait, &wait); mutex_unlock(&ldata->atomic_read_lock); - if (b - buf) - retval = b - buf; + if (kb - kbuf) + retval = kb - kbuf; return retval; } diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index 9795b2e8b0b2..1b61d26bb7af 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1056,9 +1056,9 @@ static int max310x_startup(struct uart_port *port) max310x_port_update(port, MAX310X_MODE1_REG, MAX310X_MODE1_TRNSCVCTRL_BIT, 0); - /* Reset FIFOs */ - max310x_port_write(port, MAX310X_MODE2_REG, - MAX310X_MODE2_FIFORST_BIT); + /* Configure MODE2 register & Reset FIFOs*/ + val = MAX310X_MODE2_RXEMPTINV_BIT | MAX310X_MODE2_FIFORST_BIT; + max310x_port_write(port, MAX310X_MODE2_REG, val); max310x_port_update(port, MAX310X_MODE2_REG, MAX310X_MODE2_FIFORST_BIT, 0); @@ -1086,27 +1086,8 @@ static int max310x_startup(struct uart_port *port) /* Clear IRQ status register */ max310x_port_read(port, MAX310X_IRQSTS_REG); - /* - * Let's ask for an interrupt after a timeout equivalent to - * the receiving time of 4 characters after the last character - * has been received. - */ - max310x_port_write(port, MAX310X_RXTO_REG, 4); - - /* - * Make sure we also get RX interrupts when the RX FIFO is - * filling up quickly, so get an interrupt when half of the RX - * FIFO has been filled in. - */ - max310x_port_write(port, MAX310X_FIFOTRIGLVL_REG, - MAX310X_FIFOTRIGLVL_RX(MAX310X_FIFO_SIZE / 2)); - - /* Enable RX timeout interrupt in LSR */ - max310x_port_write(port, MAX310X_LSR_IRQEN_REG, - MAX310X_LSR_RXTO_BIT); - - /* Enable LSR, RX FIFO trigger, CTS change interrupts */ - val = MAX310X_IRQ_LSR_BIT | MAX310X_IRQ_RXFIFO_BIT | MAX310X_IRQ_TXEMPTY_BIT; + /* Enable RX, TX, CTS change interrupts */ + val = MAX310X_IRQ_RXEMPTY_BIT | MAX310X_IRQ_TXEMPTY_BIT; max310x_port_write(port, MAX310X_IRQEN_REG, val | MAX310X_IRQ_CTS_BIT); return 0; diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c index f4de32d3f2af..6248304a001f 100644 --- a/drivers/tty/serial/stm32-usart.c +++ b/drivers/tty/serial/stm32-usart.c @@ -383,17 +383,18 @@ static void stm32_transmit_chars_dma(struct uart_port *port) DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT); - if (!desc) { - for (i = count; i > 0; i--) - stm32_transmit_chars_pio(port); - return; - } + if (!desc) + goto fallback_err; desc->callback = stm32_tx_dma_complete; desc->callback_param = port; /* Push current DMA TX transaction in the pending queue */ - dmaengine_submit(desc); + if (dma_submit_error(dmaengine_submit(desc))) { + /* dma no yet started, safe to free resources */ + dmaengine_terminate_async(stm32port->tx_ch); + goto fallback_err; + } /* Issue pending DMA TX requests */ dma_async_issue_pending(stm32port->tx_ch); @@ -402,6 +403,11 @@ static void stm32_transmit_chars_dma(struct uart_port *port) xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1); port->icount.tx += count; + return; + +fallback_err: + for (i = count; i > 0; i--) + stm32_transmit_chars_pio(port); } static void stm32_transmit_chars(struct uart_port *port) @@ -1130,7 +1136,11 @@ static int stm32_of_dma_rx_probe(struct stm32_port *stm32port, desc->callback_param = NULL; /* Push current DMA transaction in the pending queue */ - dmaengine_submit(desc); + ret = dma_submit_error(dmaengine_submit(desc)); + if (ret) { + dmaengine_terminate_sync(stm32port->rx_ch); + goto config_err; + } /* Issue pending DMA requests */ dma_async_issue_pending(stm32port->rx_ch); diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 816e709afa56..5fd87941ac71 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -142,7 +142,7 @@ LIST_HEAD(tty_drivers); /* linked list of tty drivers */ /* Mutex to protect creating and releasing a tty */ DEFINE_MUTEX(tty_mutex); -static ssize_t tty_read(struct file *, char __user *, size_t, loff_t *); +static ssize_t tty_read(struct kiocb *, struct iov_iter *); static ssize_t tty_write(struct kiocb *, struct iov_iter *); static __poll_t tty_poll(struct file *, poll_table *); static int tty_open(struct inode *, struct file *); @@ -429,8 +429,7 @@ struct tty_driver *tty_find_polling_driver(char *name, int *line) EXPORT_SYMBOL_GPL(tty_find_polling_driver); #endif -static ssize_t hung_up_tty_read(struct file *file, char __user *buf, - size_t count, loff_t *ppos) +static ssize_t hung_up_tty_read(struct kiocb *iocb, struct iov_iter *to) { return 0; } @@ -473,8 +472,9 @@ static void tty_show_fdinfo(struct seq_file *m, struct file *file) static const struct file_operations tty_fops = { .llseek = no_llseek, - .read = tty_read, + .read_iter = tty_read, .write_iter = tty_write, + .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .poll = tty_poll, .unlocked_ioctl = tty_ioctl, @@ -487,8 +487,9 @@ static const struct file_operations tty_fops = { static const struct file_operations console_fops = { .llseek = no_llseek, - .read = tty_read, + .read_iter = tty_read, .write_iter = redirected_tty_write, + .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, .poll = tty_poll, .unlocked_ioctl = tty_ioctl, @@ -500,7 +501,7 @@ static const struct file_operations console_fops = { static const struct file_operations hung_up_tty_fops = { .llseek = no_llseek, - .read = hung_up_tty_read, + .read_iter = hung_up_tty_read, .write_iter = hung_up_tty_write, .poll = hung_up_tty_poll, .unlocked_ioctl = hung_up_tty_ioctl, @@ -829,6 +830,72 @@ static void tty_update_time(struct timespec64 *time) time->tv_sec = sec; } +/* + * Iterate on the ldisc ->read() function until we've gotten all + * the data the ldisc has for us. + * + * The "cookie" is something that the ldisc read function can fill + * in to let us know that there is more data to be had. + * + * We promise to continue to call the ldisc until it stops returning + * data or clears the cookie. The cookie may be something that the + * ldisc maintains state for and needs to free. + */ +static int iterate_tty_read(struct tty_ldisc *ld, struct tty_struct *tty, + struct file *file, struct iov_iter *to) +{ + int retval = 0; + void *cookie = NULL; + unsigned long offset = 0; + char kernel_buf[64]; + size_t count = iov_iter_count(to); + + do { + int size, copied; + + size = count > sizeof(kernel_buf) ? sizeof(kernel_buf) : count; + size = ld->ops->read(tty, file, kernel_buf, size, &cookie, offset); + if (!size) + break; + + if (size < 0) { + /* Did we have an earlier error (ie -EFAULT)? */ + if (retval) + break; + retval = size; + + /* + * -EOVERFLOW means we didn't have enough space + * for a whole packet, and we shouldn't return + * a partial result. + */ + if (retval == -EOVERFLOW) + offset = 0; + break; + } + + copied = copy_to_iter(kernel_buf, size, to); + offset += copied; + count -= copied; + + /* + * If the user copy failed, we still need to do another ->read() + * call if we had a cookie to let the ldisc clear up. + * + * But make sure size is zeroed. + */ + if (unlikely(copied != size)) { + count = 0; + retval = -EFAULT; + } + } while (cookie); + + /* We always clear tty buffer in case they contained passwords */ + memzero_explicit(kernel_buf, sizeof(kernel_buf)); + return offset ? offset : retval; +} + + /** * tty_read - read method for tty device files * @file: pointer to tty file @@ -844,10 +911,10 @@ static void tty_update_time(struct timespec64 *time) * read calls may be outstanding in parallel. */ -static ssize_t tty_read(struct file *file, char __user *buf, size_t count, - loff_t *ppos) +static ssize_t tty_read(struct kiocb *iocb, struct iov_iter *to) { int i; + struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); struct tty_struct *tty = file_tty(file); struct tty_ldisc *ld; @@ -861,11 +928,10 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count, situation */ ld = tty_ldisc_ref_wait(tty); if (!ld) - return hung_up_tty_read(file, buf, count, ppos); + return hung_up_tty_read(iocb, to); + i = -EIO; if (ld->ops->read) - i = ld->ops->read(tty, file, buf, count); - else - i = -EIO; + i = iterate_tty_read(ld, tty, file, to); tty_ldisc_deref(ld); if (i > 0) @@ -962,11 +1028,14 @@ static inline ssize_t do_tty_write( if (ret <= 0) break; + written += ret; + if (ret > size) + break; + /* FIXME! Have Al check this! */ if (ret != size) iov_iter_revert(from, size-ret); - written += ret; count -= ret; if (!count) break; @@ -2884,7 +2953,7 @@ static long tty_compat_ioctl(struct file *file, unsigned int cmd, static int this_tty(const void *t, struct file *file, unsigned fd) { - if (likely(file->f_op->read != tty_read)) + if (likely(file->f_op->read_iter != tty_read)) return 0; return file_tty(file) != t ? 0 : fd + 1; } diff --git a/drivers/tty/vt/consolemap.c b/drivers/tty/vt/consolemap.c index f7d015c67963..d815ac98b39e 100644 --- a/drivers/tty/vt/consolemap.c +++ b/drivers/tty/vt/consolemap.c @@ -495,7 +495,7 @@ con_insert_unipair(struct uni_pagedir *p, u_short unicode, u_short fontpos) p2[unicode & 0x3f] = fontpos; - p->sum += (fontpos << 20) + unicode; + p->sum += (fontpos << 20U) + unicode; return 0; } diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 781905745812..2f4e5174e78c 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -1929,6 +1929,11 @@ static const struct usb_device_id acm_ids[] = { .driver_info = SEND_ZERO_PACKET, }, + /* Exclude Goodix Fingerprint Reader */ + { USB_DEVICE(0x27c6, 0x5395), + .driver_info = IGNORE_DEVICE, + }, + /* control interfaces without any protocol set */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_PROTO_NONE) }, diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index c9f6e9758288..f27b4aecff3d 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -494,16 +494,24 @@ static int usblp_release(struct inode *inode, struct file *file) /* No kernel lock - fine */ static __poll_t usblp_poll(struct file *file, struct poll_table_struct *wait) { - __poll_t ret; + struct usblp *usblp = file->private_data; + __poll_t ret = 0; unsigned long flags; - struct usblp *usblp = file->private_data; /* Should we check file->f_mode & FMODE_WRITE before poll_wait()? */ poll_wait(file, &usblp->rwait, wait); poll_wait(file, &usblp->wwait, wait); + + mutex_lock(&usblp->mut); + if (!usblp->present) + ret |= EPOLLHUP; + mutex_unlock(&usblp->mut); + spin_lock_irqsave(&usblp->lock, flags); - ret = ((usblp->bidir && usblp->rcomplete) ? EPOLLIN | EPOLLRDNORM : 0) | - ((usblp->no_paper || usblp->wcomplete) ? EPOLLOUT | EPOLLWRNORM : 0); + if (usblp->bidir && usblp->rcomplete) + ret |= EPOLLIN | EPOLLRDNORM; + if (usblp->no_paper || usblp->wcomplete) + ret |= EPOLLOUT | EPOLLWRNORM; spin_unlock_irqrestore(&usblp->lock, flags); return ret; } diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c index 1b4eb7046b07..6ade3daf7858 100644 --- a/drivers/usb/core/quirks.c +++ b/drivers/usb/core/quirks.c @@ -391,6 +391,9 @@ static const struct usb_device_id usb_quirk_list[] = { /* X-Rite/Gretag-Macbeth Eye-One Pro display colorimeter */ { USB_DEVICE(0x0971, 0x2000), .driver_info = USB_QUIRK_NO_SET_INTF }, + /* ELMO L-12F document camera */ + { USB_DEVICE(0x09a1, 0x0028), .driver_info = USB_QUIRK_DELAY_CTRL_MSG }, + /* Broadcom BCM92035DGROM BT dongle */ { USB_DEVICE(0x0a5c, 0x2021), .driver_info = USB_QUIRK_RESET_RESUME }, @@ -415,6 +418,9 @@ static const struct usb_device_id usb_quirk_list[] = { { USB_DEVICE(0x10d6, 0x2200), .driver_info = USB_QUIRK_STRING_FETCH_255 }, + /* novation SoundControl XL */ + { USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME }, + /* Huawei 4G LTE module */ { USB_DEVICE(0x12d1, 0x15bb), .driver_info = USB_QUIRK_DISCONNECT_SUSPEND }, @@ -495,9 +501,6 @@ static const struct usb_device_id usb_quirk_list[] = { /* INTEL VALUE SSD */ { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME }, - /* novation SoundControl XL */ - { USB_DEVICE(0x1235, 0x0061), .driver_info = USB_QUIRK_RESET_RESUME }, - { } /* terminating entry must be last */ }; diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index 8f07b0516100..a566bb494e24 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -748,6 +748,38 @@ void usb_put_intf(struct usb_interface *intf) } EXPORT_SYMBOL_GPL(usb_put_intf); +/** + * usb_intf_get_dma_device - acquire a reference on the usb interface's DMA endpoint + * @intf: the usb interface + * + * While a USB device cannot perform DMA operations by itself, many USB + * controllers can. A call to usb_intf_get_dma_device() returns the DMA endpoint + * for the given USB interface, if any. The returned device structure must be + * released with put_device(). + * + * See also usb_get_dma_device(). + * + * Returns: A reference to the usb interface's DMA endpoint; or NULL if none + * exists. + */ +struct device *usb_intf_get_dma_device(struct usb_interface *intf) +{ + struct usb_device *udev = interface_to_usbdev(intf); + struct device *dmadev; + + if (!udev->bus) + return NULL; + + dmadev = get_device(udev->bus->sysdev); + if (!dmadev || !dmadev->dma_mask) { + put_device(dmadev); + return NULL; + } + + return dmadev; +} +EXPORT_SYMBOL_GPL(usb_intf_get_dma_device); + /* USB device locking * * USB devices and interfaces are locked using the semaphore in their diff --git a/drivers/usb/dwc2/hcd.c b/drivers/usb/dwc2/hcd.c index e9ac215b9663..fc3269f5faf1 100644 --- a/drivers/usb/dwc2/hcd.c +++ b/drivers/usb/dwc2/hcd.c @@ -1313,19 +1313,20 @@ static void dwc2_hc_start_transfer(struct dwc2_hsotg *hsotg, if (num_packets > max_hc_pkt_count) { num_packets = max_hc_pkt_count; chan->xfer_len = num_packets * chan->max_packet; + } else if (chan->ep_is_in) { + /* + * Always program an integral # of max packets + * for IN transfers. + * Note: This assumes that the input buffer is + * aligned and sized accordingly. + */ + chan->xfer_len = num_packets * chan->max_packet; } } else { /* Need 1 packet for transfer length of 0 */ num_packets = 1; } - if (chan->ep_is_in) - /* - * Always program an integral # of max packets for IN - * transfers - */ - chan->xfer_len = num_packets * chan->max_packet; - if (chan->ep_type == USB_ENDPOINT_XFER_INT || chan->ep_type == USB_ENDPOINT_XFER_ISOC) /* diff --git a/drivers/usb/dwc2/hcd_intr.c b/drivers/usb/dwc2/hcd_intr.c index a052d39b4375..d5f4ec1b73b1 100644 --- a/drivers/usb/dwc2/hcd_intr.c +++ b/drivers/usb/dwc2/hcd_intr.c @@ -500,7 +500,7 @@ static int dwc2_update_urb_state(struct dwc2_hsotg *hsotg, &short_read); if (urb->actual_length + xfer_length > urb->length) { - dev_warn(hsotg->dev, "%s(): trimming xfer length\n", __func__); + dev_dbg(hsotg->dev, "%s(): trimming xfer length\n", __func__); xfer_length = urb->length - urb->actual_length; } @@ -1977,6 +1977,18 @@ static void dwc2_hc_chhltd_intr_dma(struct dwc2_hsotg *hsotg, qtd->error_count++; dwc2_update_urb_state_abn(hsotg, chan, chnum, qtd->urb, qtd, DWC2_HC_XFER_XACT_ERR); + /* + * We can get here after a completed transaction + * (urb->actual_length >= urb->length) which was not reported + * as completed. If that is the case, and we do not abort + * the transfer, a transfer of size 0 will be enqueued + * subsequently. If urb->actual_length is not DMA-aligned, + * the buffer will then point to an unaligned address, and + * the resulting behavior is undefined. Bail out in that + * situation. + */ + if (qtd->urb->actual_length >= qtd->urb->length) + qtd->error_count = 3; dwc2_hcd_save_data_toggle(hsotg, chan, chnum, qtd); dwc2_halt_channel(hsotg, chan, qtd, DWC2_HC_XFER_XACT_ERR); } diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c index c703d552bbcf..c00c4fa139b8 100644 --- a/drivers/usb/dwc3/dwc3-qcom.c +++ b/drivers/usb/dwc3/dwc3-qcom.c @@ -60,12 +60,14 @@ struct dwc3_acpi_pdata { int dp_hs_phy_irq_index; int dm_hs_phy_irq_index; int ss_phy_irq_index; + bool is_urs; }; struct dwc3_qcom { struct device *dev; void __iomem *qscratch_base; struct platform_device *dwc3; + struct platform_device *urs_usb; struct clk **clks; int num_clocks; struct reset_control *resets; @@ -356,8 +358,10 @@ static int dwc3_qcom_suspend(struct dwc3_qcom *qcom) if (ret) dev_warn(qcom->dev, "failed to disable interconnect: %d\n", ret); + if (device_may_wakeup(qcom->dev)) + dwc3_qcom_enable_interrupts(qcom); + qcom->is_suspended = true; - dwc3_qcom_enable_interrupts(qcom); return 0; } @@ -370,7 +374,8 @@ static int dwc3_qcom_resume(struct dwc3_qcom *qcom) if (!qcom->is_suspended) return 0; - dwc3_qcom_disable_interrupts(qcom); + if (device_may_wakeup(qcom->dev)) + dwc3_qcom_disable_interrupts(qcom); for (i = 0; i < qcom->num_clocks; i++) { ret = clk_prepare_enable(qcom->clks[i]); @@ -429,13 +434,15 @@ static void dwc3_qcom_select_utmi_clk(struct dwc3_qcom *qcom) static int dwc3_qcom_get_irq(struct platform_device *pdev, const char *name, int num) { + struct dwc3_qcom *qcom = platform_get_drvdata(pdev); + struct platform_device *pdev_irq = qcom->urs_usb ? qcom->urs_usb : pdev; struct device_node *np = pdev->dev.of_node; int ret; if (np) - ret = platform_get_irq_byname(pdev, name); + ret = platform_get_irq_byname(pdev_irq, name); else - ret = platform_get_irq(pdev, num); + ret = platform_get_irq(pdev_irq, num); return ret; } @@ -568,6 +575,8 @@ static int dwc3_qcom_acpi_register_core(struct platform_device *pdev) struct dwc3_qcom *qcom = platform_get_drvdata(pdev); struct device *dev = &pdev->dev; struct resource *res, *child_res = NULL; + struct platform_device *pdev_irq = qcom->urs_usb ? qcom->urs_usb : + pdev; int irq; int ret; @@ -597,7 +606,7 @@ static int dwc3_qcom_acpi_register_core(struct platform_device *pdev) child_res[0].end = child_res[0].start + qcom->acpi_pdata->dwc3_core_base_size; - irq = platform_get_irq(pdev, 0); + irq = platform_get_irq(pdev_irq, 0); child_res[1].flags = IORESOURCE_IRQ; child_res[1].start = child_res[1].end = irq; @@ -639,16 +648,46 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev) ret = of_platform_populate(np, NULL, NULL, dev); if (ret) { dev_err(dev, "failed to register dwc3 core - %d\n", ret); - return ret; + goto node_put; } qcom->dwc3 = of_find_device_by_node(dwc3_np); if (!qcom->dwc3) { + ret = -ENODEV; dev_err(dev, "failed to get dwc3 platform device\n"); - return -ENODEV; } - return 0; +node_put: + of_node_put(dwc3_np); + + return ret; +} + +static struct platform_device * +dwc3_qcom_create_urs_usb_platdev(struct device *dev) +{ + struct fwnode_handle *fwh; + struct acpi_device *adev; + char name[8]; + int ret; + int id; + + /* Figure out device id */ + ret = sscanf(fwnode_get_name(dev->fwnode), "URS%d", &id); + if (!ret) + return NULL; + + /* Find the child using name */ + snprintf(name, sizeof(name), "USB%d", id); + fwh = fwnode_get_named_child_node(dev->fwnode, name); + if (!fwh) + return NULL; + + adev = to_acpi_device_node(fwh); + if (!adev) + return NULL; + + return acpi_create_platform_device(adev, NULL); } static int dwc3_qcom_probe(struct platform_device *pdev) @@ -715,6 +754,14 @@ static int dwc3_qcom_probe(struct platform_device *pdev) qcom->acpi_pdata->qscratch_base_offset; parent_res->end = parent_res->start + qcom->acpi_pdata->qscratch_base_size; + + if (qcom->acpi_pdata->is_urs) { + qcom->urs_usb = dwc3_qcom_create_urs_usb_platdev(dev); + if (!qcom->urs_usb) { + dev_err(dev, "failed to create URS USB platdev\n"); + return -ENODEV; + } + } } qcom->qscratch_base = devm_ioremap_resource(dev, parent_res); @@ -877,8 +924,22 @@ static const struct dwc3_acpi_pdata sdm845_acpi_pdata = { .ss_phy_irq_index = 2 }; +static const struct dwc3_acpi_pdata sdm845_acpi_urs_pdata = { + .qscratch_base_offset = SDM845_QSCRATCH_BASE_OFFSET, + .qscratch_base_size = SDM845_QSCRATCH_SIZE, + .dwc3_core_base_size = SDM845_DWC3_CORE_SIZE, + .hs_phy_irq_index = 1, + .dp_hs_phy_irq_index = 4, + .dm_hs_phy_irq_index = 3, + .ss_phy_irq_index = 2, + .is_urs = true, +}; + static const struct acpi_device_id dwc3_qcom_acpi_match[] = { { "QCOM2430", (unsigned long)&sdm845_acpi_pdata }, + { "QCOM0304", (unsigned long)&sdm845_acpi_urs_pdata }, + { "QCOM0497", (unsigned long)&sdm845_acpi_urs_pdata }, + { "QCOM04A6", (unsigned long)&sdm845_acpi_pdata }, { }, }; MODULE_DEVICE_TABLE(acpi, dwc3_qcom_acpi_match); diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c index ee44321fee38..2a86ad4b12b3 100644 --- a/drivers/usb/dwc3/gadget.c +++ b/drivers/usb/dwc3/gadget.c @@ -605,8 +605,23 @@ static int dwc3_gadget_set_ep_config(struct dwc3_ep *dep, unsigned int action) params.param0 |= DWC3_DEPCFG_FIFO_NUMBER(dep->number >> 1); if (desc->bInterval) { - params.param1 |= DWC3_DEPCFG_BINTERVAL_M1(desc->bInterval - 1); - dep->interval = 1 << (desc->bInterval - 1); + u8 bInterval_m1; + + /* + * Valid range for DEPCFG.bInterval_m1 is from 0 to 13, and it + * must be set to 0 when the controller operates in full-speed. + */ + bInterval_m1 = min_t(u8, desc->bInterval - 1, 13); + if (dwc->gadget->speed == USB_SPEED_FULL) + bInterval_m1 = 0; + + if (usb_endpoint_type(desc) == USB_ENDPOINT_XFER_INT && + dwc->gadget->speed == USB_SPEED_FULL) + dep->interval = desc->bInterval; + else + dep->interval = 1 << (desc->bInterval - 1); + + params.param1 |= DWC3_DEPCFG_BINTERVAL_M1(bInterval_m1); } return dwc3_send_gadget_ep_cmd(dep, DWC3_DEPCMD_SETEPCONFIG, ¶ms); @@ -768,8 +783,6 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) trace_dwc3_gadget_ep_disable(dep); - dwc3_remove_requests(dwc, dep); - /* make sure HW endpoint isn't stalled */ if (dep->flags & DWC3_EP_STALL) __dwc3_gadget_ep_set_halt(dep, 0, false); @@ -788,6 +801,8 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep) dep->endpoint.desc = NULL; } + dwc3_remove_requests(dwc, dep); + return 0; } @@ -1602,7 +1617,7 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req) { struct dwc3 *dwc = dep->dwc; - if (!dep->endpoint.desc || !dwc->pullups_connected) { + if (!dep->endpoint.desc || !dwc->pullups_connected || !dwc->connected) { dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n", dep->name); return -ESHUTDOWN; @@ -2110,6 +2125,17 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) } } + /* + * Check the return value for successful resume, or error. For a + * successful resume, the DWC3 runtime PM resume routine will handle + * the run stop sequence, so avoid duplicate operations here. + */ + ret = pm_runtime_get_sync(dwc->dev); + if (!ret || ret < 0) { + pm_runtime_put(dwc->dev); + return 0; + } + /* * Synchronize any pending event handling before executing the controller * halt routine. @@ -2124,6 +2150,7 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) if (!is_on) { u32 count; + dwc->connected = false; /* * In the Synopsis DesignWare Cores USB3 Databook Rev. 3.30a * Section 4.1.8 Table 4-7, it states that for a device-initiated @@ -2154,6 +2181,7 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on) ret = dwc3_gadget_run_stop(dwc, is_on, false); spin_unlock_irqrestore(&dwc->lock, flags); + pm_runtime_put(dwc->dev); return ret; } @@ -3239,8 +3267,6 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc) { u32 reg; - dwc->connected = true; - /* * WORKAROUND: DWC3 revisions <1.88a have an issue which * would cause a missing Disconnect Event if there's a @@ -3280,6 +3306,7 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc) * transfers." */ dwc3_stop_active_transfers(dwc); + dwc->connected = true; reg = dwc3_readl(dwc->regs, DWC3_DCTL); reg &= ~DWC3_DCTL_TSTCTRL_MASK; diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c index 36ffb43f9c1a..9b7fa53d6642 100644 --- a/drivers/usb/gadget/configfs.c +++ b/drivers/usb/gadget/configfs.c @@ -97,6 +97,8 @@ struct gadget_config_name { struct list_head list; }; +#define USB_MAX_STRING_WITH_NULL_LEN (USB_MAX_STRING_LEN+1) + static int usb_string_copy(const char *s, char **s_copy) { int ret; @@ -106,12 +108,16 @@ static int usb_string_copy(const char *s, char **s_copy) if (ret > USB_MAX_STRING_LEN) return -EOVERFLOW; - str = kstrdup(s, GFP_KERNEL); - if (!str) - return -ENOMEM; + if (copy) { + str = copy; + } else { + str = kmalloc(USB_MAX_STRING_WITH_NULL_LEN, GFP_KERNEL); + if (!str) + return -ENOMEM; + } + strcpy(str, s); if (str[ret - 1] == '\n') str[ret - 1] = '\0'; - kfree(copy); *s_copy = str; return 0; } diff --git a/drivers/usb/gadget/function/f_uac1.c b/drivers/usb/gadget/function/f_uac1.c index 00d346965f7a..560382e0a8f3 100644 --- a/drivers/usb/gadget/function/f_uac1.c +++ b/drivers/usb/gadget/function/f_uac1.c @@ -499,6 +499,7 @@ static void f_audio_disable(struct usb_function *f) uac1->as_out_alt = 0; uac1->as_in_alt = 0; + u_audio_stop_playback(&uac1->g_audio); u_audio_stop_capture(&uac1->g_audio); } diff --git a/drivers/usb/gadget/function/f_uac2.c b/drivers/usb/gadget/function/f_uac2.c index 5d960b6603b6..6f03e944e0e3 100644 --- a/drivers/usb/gadget/function/f_uac2.c +++ b/drivers/usb/gadget/function/f_uac2.c @@ -478,7 +478,7 @@ static int set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts, } max_size_bw = num_channels(chmask) * ssize * - DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1))); + ((srate / (factor / (1 << (ep_desc->bInterval - 1)))) + 1); ep_desc->wMaxPacketSize = cpu_to_le16(min_t(u16, max_size_bw, max_size_ep)); diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index e6d32c536781..908e49dafd62 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -89,7 +89,12 @@ static void u_audio_iso_complete(struct usb_ep *ep, struct usb_request *req) struct snd_uac_chip *uac = prm->uac; /* i/f shutting down */ - if (!prm->ep_enabled || req->status == -ESHUTDOWN) + if (!prm->ep_enabled) { + usb_ep_free_request(ep, req); + return; + } + + if (req->status == -ESHUTDOWN) return; /* @@ -336,8 +341,14 @@ static inline void free_ep(struct uac_rtd_params *prm, struct usb_ep *ep) for (i = 0; i < params->req_number; i++) { if (prm->ureq[i].req) { - usb_ep_dequeue(ep, prm->ureq[i].req); - usb_ep_free_request(ep, prm->ureq[i].req); + if (usb_ep_dequeue(ep, prm->ureq[i].req)) + usb_ep_free_request(ep, prm->ureq[i].req); + /* + * If usb_ep_dequeue() cannot successfully dequeue the + * request, the request will be freed by the completion + * callback. + */ + prm->ureq[i].req = NULL; } } diff --git a/drivers/usb/gadget/function/u_ether_configfs.h b/drivers/usb/gadget/function/u_ether_configfs.h index bd92b5703013..f982e18a5a78 100644 --- a/drivers/usb/gadget/function/u_ether_configfs.h +++ b/drivers/usb/gadget/function/u_ether_configfs.h @@ -169,12 +169,11 @@ out: \ size_t len) \ { \ struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item); \ - int ret; \ + int ret = -EINVAL; \ u8 val; \ \ mutex_lock(&opts->lock); \ - ret = sscanf(page, "%02hhx", &val); \ - if (ret > 0) { \ + if (sscanf(page, "%02hhx", &val) > 0) { \ opts->_n_ = val; \ ret = len; \ } \ diff --git a/drivers/usb/gadget/udc/s3c2410_udc.c b/drivers/usb/gadget/udc/s3c2410_udc.c index f1ea51476add..1d3ebb07ccd4 100644 --- a/drivers/usb/gadget/udc/s3c2410_udc.c +++ b/drivers/usb/gadget/udc/s3c2410_udc.c @@ -1773,8 +1773,8 @@ static int s3c2410_udc_probe(struct platform_device *pdev) udc_info = dev_get_platdata(&pdev->dev); base_addr = devm_platform_ioremap_resource(pdev, 0); - if (!base_addr) { - retval = -ENOMEM; + if (IS_ERR(base_addr)) { + retval = PTR_ERR(base_addr); goto err_mem; } diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c index 84da8406d5b4..5bbccc9a0179 100644 --- a/drivers/usb/host/xhci-pci.c +++ b/drivers/usb/host/xhci-pci.c @@ -66,6 +66,7 @@ #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI 0x1142 #define PCI_DEVICE_ID_ASMEDIA_1142_XHCI 0x1242 #define PCI_DEVICE_ID_ASMEDIA_2142_XHCI 0x2142 +#define PCI_DEVICE_ID_ASMEDIA_3242_XHCI 0x3242 static const char hcd_name[] = "xhci_hcd"; @@ -276,11 +277,14 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == PCI_DEVICE_ID_ASMEDIA_1042_XHCI) xhci->quirks |= XHCI_BROKEN_STREAMS; if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && - pdev->device == PCI_DEVICE_ID_ASMEDIA_1042A_XHCI) + pdev->device == PCI_DEVICE_ID_ASMEDIA_1042A_XHCI) { xhci->quirks |= XHCI_TRUST_TX_LENGTH; + xhci->quirks |= XHCI_NO_64BIT_SUPPORT; + } if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && (pdev->device == PCI_DEVICE_ID_ASMEDIA_1142_XHCI || - pdev->device == PCI_DEVICE_ID_ASMEDIA_2142_XHCI)) + pdev->device == PCI_DEVICE_ID_ASMEDIA_2142_XHCI || + pdev->device == PCI_DEVICE_ID_ASMEDIA_3242_XHCI)) xhci->quirks |= XHCI_NO_64BIT_SUPPORT; if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA && @@ -295,6 +299,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci) pdev->device == 0x9026) xhci->quirks |= XHCI_RESET_PLL_ON_DISCONNECT; + if (pdev->vendor == PCI_VENDOR_ID_AMD && + (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_2 || + pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4)) + xhci->quirks |= XHCI_NO_SOFT_RETRY; + if (xhci->quirks & XHCI_RESET_ON_RESUME) xhci_dbg_trace(xhci, trace_xhci_dbg_quirks, "QUIRK: Resetting on resume"); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 89c3be9917f6..02ea65db80f3 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2307,7 +2307,8 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td, remaining = 0; break; case COMP_USB_TRANSACTION_ERROR: - if ((ep_ring->err_count++ > MAX_SOFT_RETRY) || + if (xhci->quirks & XHCI_NO_SOFT_RETRY || + (ep_ring->err_count++ > MAX_SOFT_RETRY) || le32_to_cpu(slot_ctx->tt_info) & TT_SLOT) break; *status = 0; diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 345a221028c6..fd84ca7534e0 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -883,44 +883,42 @@ static void xhci_clear_command_ring(struct xhci_hcd *xhci) xhci_set_cmd_ring_deq(xhci); } -static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci) +/* + * Disable port wake bits if do_wakeup is not set. + * + * Also clear a possible internal port wake state left hanging for ports that + * detected termination but never successfully enumerated (trained to 0U). + * Internal wake causes immediate xHCI wake after suspend. PORT_CSC write done + * at enumeration clears this wake, force one here as well for unconnected ports + */ + +static void xhci_disable_hub_port_wake(struct xhci_hcd *xhci, + struct xhci_hub *rhub, + bool do_wakeup) { - struct xhci_port **ports; - int port_index; unsigned long flags; u32 t1, t2, portsc; + int i; spin_lock_irqsave(&xhci->lock, flags); - /* disable usb3 ports Wake bits */ - port_index = xhci->usb3_rhub.num_ports; - ports = xhci->usb3_rhub.ports; - while (port_index--) { - t1 = readl(ports[port_index]->addr); - portsc = t1; - t1 = xhci_port_state_to_neutral(t1); - t2 = t1 & ~PORT_WAKE_BITS; - if (t1 != t2) { - writel(t2, ports[port_index]->addr); - xhci_dbg(xhci, "disable wake bits port %d-%d, portsc: 0x%x, write: 0x%x\n", - xhci->usb3_rhub.hcd->self.busnum, - port_index + 1, portsc, t2); - } - } + for (i = 0; i < rhub->num_ports; i++) { + portsc = readl(rhub->ports[i]->addr); + t1 = xhci_port_state_to_neutral(portsc); + t2 = t1; + + /* clear wake bits if do_wake is not set */ + if (!do_wakeup) + t2 &= ~PORT_WAKE_BITS; + + /* Don't touch csc bit if connected or connect change is set */ + if (!(portsc & (PORT_CSC | PORT_CONNECT))) + t2 |= PORT_CSC; - /* disable usb2 ports Wake bits */ - port_index = xhci->usb2_rhub.num_ports; - ports = xhci->usb2_rhub.ports; - while (port_index--) { - t1 = readl(ports[port_index]->addr); - portsc = t1; - t1 = xhci_port_state_to_neutral(t1); - t2 = t1 & ~PORT_WAKE_BITS; if (t1 != t2) { - writel(t2, ports[port_index]->addr); - xhci_dbg(xhci, "disable wake bits port %d-%d, portsc: 0x%x, write: 0x%x\n", - xhci->usb2_rhub.hcd->self.busnum, - port_index + 1, portsc, t2); + writel(t2, rhub->ports[i]->addr); + xhci_dbg(xhci, "config port %d-%d wake bits, portsc: 0x%x, write: 0x%x\n", + rhub->hcd->self.busnum, i + 1, portsc, t2); } } spin_unlock_irqrestore(&xhci->lock, flags); @@ -983,8 +981,8 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup) return -EINVAL; /* Clear root port wake on bits if wakeup not allowed. */ - if (!do_wakeup) - xhci_disable_port_wake_on_bits(xhci); + xhci_disable_hub_port_wake(xhci, &xhci->usb3_rhub, do_wakeup); + xhci_disable_hub_port_wake(xhci, &xhci->usb2_rhub, do_wakeup); if (!HCD_HW_ACCESSIBLE(hcd)) return 0; @@ -1088,6 +1086,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) struct usb_hcd *secondary_hcd; int retval = 0; bool comp_timer_running = false; + bool pending_portevent = false; if (!hcd->state) return 0; @@ -1226,13 +1225,22 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated) done: if (retval == 0) { - /* Resume root hubs only when have pending events. */ - if (xhci_pending_portevent(xhci)) { + /* + * Resume roothubs only if there are pending events. + * USB 3 devices resend U3 LFPS wake after a 100ms delay if + * the first wake signalling failed, give it that chance. + */ + pending_portevent = xhci_pending_portevent(xhci); + if (!pending_portevent) { + msleep(120); + pending_portevent = xhci_pending_portevent(xhci); + } + + if (pending_portevent) { usb_hcd_resume_root_hub(xhci->shared_hcd); usb_hcd_resume_root_hub(hcd); } } - /* * If system is subject to the Quirk, Compliance Mode Timer needs to * be re-initialized Always after a system resume. Ports are subject diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 07ff95016f11..3190fd570c57 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -1883,6 +1883,7 @@ struct xhci_hcd { #define XHCI_SKIP_PHY_INIT BIT_ULL(37) #define XHCI_DISABLE_SPARSE BIT_ULL(38) #define XHCI_SG_TRB_CACHE_SIZE_QUIRK BIT_ULL(39) +#define XHCI_NO_SOFT_RETRY BIT_ULL(40) unsigned int num_active_eps; unsigned int limit_active_eps; diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c index 849e0b770130..1cd87729ba60 100644 --- a/drivers/usb/musb/musb_core.c +++ b/drivers/usb/musb/musb_core.c @@ -2240,32 +2240,35 @@ int musb_queue_resume_work(struct musb *musb, { struct musb_pending_work *w; unsigned long flags; + bool is_suspended; int error; if (WARN_ON(!callback)) return -EINVAL; - if (pm_runtime_active(musb->controller)) - return callback(musb, data); + spin_lock_irqsave(&musb->list_lock, flags); + is_suspended = musb->is_runtime_suspended; + + if (is_suspended) { + w = devm_kzalloc(musb->controller, sizeof(*w), GFP_ATOMIC); + if (!w) { + error = -ENOMEM; + goto out_unlock; + } - w = devm_kzalloc(musb->controller, sizeof(*w), GFP_ATOMIC); - if (!w) - return -ENOMEM; + w->callback = callback; + w->data = data; - w->callback = callback; - w->data = data; - spin_lock_irqsave(&musb->list_lock, flags); - if (musb->is_runtime_suspended) { list_add_tail(&w->node, &musb->pending_list); error = 0; - } else { - dev_err(musb->controller, "could not add resume work %p\n", - callback); - devm_kfree(musb->controller, w); - error = -EINPROGRESS; } + +out_unlock: spin_unlock_irqrestore(&musb->list_lock, flags); + if (!is_suspended) + error = callback(musb, data); + return error; } EXPORT_SYMBOL_GPL(musb_queue_resume_work); diff --git a/drivers/usb/renesas_usbhs/pipe.c b/drivers/usb/renesas_usbhs/pipe.c index e7334b7fb3a6..75fff2e4cbc6 100644 --- a/drivers/usb/renesas_usbhs/pipe.c +++ b/drivers/usb/renesas_usbhs/pipe.c @@ -746,6 +746,8 @@ struct usbhs_pipe *usbhs_pipe_malloc(struct usbhs_priv *priv, void usbhs_pipe_free(struct usbhs_pipe *pipe) { + usbhsp_pipe_select(pipe); + usbhsp_pipe_cfg_set(pipe, 0xFFFF, 0); usbhsp_put_pipe(pipe); } diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 28deaaec581f..f26861246f65 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -86,6 +86,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1a86, 0x7522) }, { USB_DEVICE(0x1a86, 0x7523) }, { USB_DEVICE(0x4348, 0x5523) }, + { USB_DEVICE(0x9986, 0x7523) }, { }, }; MODULE_DEVICE_TABLE(usb, id_table); diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index 7bec1e730b20..6947d5f4cb5e 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -146,6 +146,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x8857) }, /* CEL EM357 ZigBee USB Stick */ { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */ { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */ + { USB_DEVICE(0x10C4, 0x88D8) }, /* Acuity Brands nLight Air Adapter */ { USB_DEVICE(0x10C4, 0x88FB) }, /* CESINEL MEDCAL STII Network Analyzer */ { USB_DEVICE(0x10C4, 0x8938) }, /* CESINEL MEDCAL S II Network Analyzer */ { USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */ @@ -202,6 +203,8 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x1901, 0x0194) }, /* GE Healthcare Remote Alarm Box */ { USB_DEVICE(0x1901, 0x0195) }, /* GE B850/B650/B450 CP2104 DP UART interface */ { USB_DEVICE(0x1901, 0x0196) }, /* GE B850 CP2105 DP UART interface */ + { USB_DEVICE(0x1901, 0x0197) }, /* GE CS1000 Display serial interface */ + { USB_DEVICE(0x1901, 0x0198) }, /* GE CS1000 M.2 Key E serial interface */ { USB_DEVICE(0x199B, 0xBA30) }, /* LORD WSDA-200-USB */ { USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */ { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */ diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 94398f89e600..4168801b9595 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -1386,8 +1386,9 @@ static int change_speed(struct tty_struct *tty, struct usb_serial_port *port) index_value = get_ftdi_divisor(tty, port); value = (u16)index_value; index = (u16)(index_value >> 16); - if ((priv->chip_type == FT2232C) || (priv->chip_type == FT2232H) || - (priv->chip_type == FT4232H) || (priv->chip_type == FT232H)) { + if (priv->chip_type == FT2232C || priv->chip_type == FT2232H || + priv->chip_type == FT4232H || priv->chip_type == FT232H || + priv->chip_type == FTX) { /* Probably the BM type needs the MSB of the encoded fractional * divider also moved like for the chips above. Any infos? */ index = (u16)((index << 8) | priv->interface); diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c index ba5d8df69518..4b48ef4adbeb 100644 --- a/drivers/usb/serial/io_edgeport.c +++ b/drivers/usb/serial/io_edgeport.c @@ -3003,26 +3003,32 @@ static int edge_startup(struct usb_serial *serial) response = -ENODEV; } - usb_free_urb(edge_serial->interrupt_read_urb); - kfree(edge_serial->interrupt_in_buffer); - - usb_free_urb(edge_serial->read_urb); - kfree(edge_serial->bulk_in_buffer); - - kfree(edge_serial); - - return response; + goto error; } /* start interrupt read for this edgeport this interrupt will * continue as long as the edgeport is connected */ response = usb_submit_urb(edge_serial->interrupt_read_urb, GFP_KERNEL); - if (response) + if (response) { dev_err(ddev, "%s - Error %d submitting control urb\n", __func__, response); + + goto error; + } } return response; + +error: + usb_free_urb(edge_serial->interrupt_read_urb); + kfree(edge_serial->interrupt_in_buffer); + + usb_free_urb(edge_serial->read_urb); + kfree(edge_serial->bulk_in_buffer); + + kfree(edge_serial); + + return response; } diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index 41ee2984a0df..785e97581927 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -1092,8 +1092,10 @@ static int mos7720_write(struct tty_struct *tty, struct usb_serial_port *port, if (urb->transfer_buffer == NULL) { urb->transfer_buffer = kmalloc(URB_TRANSFER_BUFFER_SIZE, GFP_ATOMIC); - if (!urb->transfer_buffer) + if (!urb->transfer_buffer) { + bytes_sent = -ENOMEM; goto exit; + } } transfer_size = min(count, URB_TRANSFER_BUFFER_SIZE); diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c index 23f91d658cb4..30c25ef0dacd 100644 --- a/drivers/usb/serial/mos7840.c +++ b/drivers/usb/serial/mos7840.c @@ -883,8 +883,10 @@ static int mos7840_write(struct tty_struct *tty, struct usb_serial_port *port, if (urb->transfer_buffer == NULL) { urb->transfer_buffer = kmalloc(URB_TRANSFER_BUFFER_SIZE, GFP_ATOMIC); - if (!urb->transfer_buffer) + if (!urb->transfer_buffer) { + bytes_sent = -ENOMEM; goto exit; + } } transfer_size = min(count, URB_TRANSFER_BUFFER_SIZE); diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 2049e66f34a3..c6969ca72839 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1569,7 +1569,8 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1272, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1273, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1274, 0xff, 0xff, 0xff) }, - { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1275, 0xff, 0xff, 0xff) }, + { USB_DEVICE(ZTE_VENDOR_ID, 0x1275), /* ZTE P685M */ + .driver_info = RSVD(3) | RSVD(4) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1276, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1277, 0xff, 0xff, 0xff) }, { USB_DEVICE_AND_INTERFACE_INFO(ZTE_VENDOR_ID, 0x1278, 0xff, 0xff, 0xff) }, diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c index be8067017eaa..29dda60e3bcd 100644 --- a/drivers/usb/serial/pl2303.c +++ b/drivers/usb/serial/pl2303.c @@ -183,6 +183,7 @@ struct pl2303_type_data { speed_t max_baud_rate; unsigned long quirks; unsigned int no_autoxonxoff:1; + unsigned int no_divisors:1; }; struct pl2303_serial_private { @@ -209,6 +210,7 @@ static const struct pl2303_type_data pl2303_type_data[TYPE_COUNT] = { }, [TYPE_HXN] = { .max_baud_rate = 12000000, + .no_divisors = true, }, }; @@ -571,8 +573,12 @@ static void pl2303_encode_baud_rate(struct tty_struct *tty, baud = min_t(speed_t, baud, spriv->type->max_baud_rate); /* * Use direct method for supported baud rates, otherwise use divisors. + * Newer chip types do not support divisor encoding. */ - baud_sup = pl2303_get_supported_baud_rate(baud); + if (spriv->type->no_divisors) + baud_sup = baud; + else + baud_sup = pl2303_get_supported_baud_rate(baud); if (baud == baud_sup) baud = pl2303_encode_baud_rate_direct(buf, baud); diff --git a/drivers/usb/storage/transport.c b/drivers/usb/storage/transport.c index 5eb895b19c55..f4304ce69350 100644 --- a/drivers/usb/storage/transport.c +++ b/drivers/usb/storage/transport.c @@ -656,6 +656,13 @@ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us) need_auto_sense = 1; } + /* Some devices (Kindle) require another command after SYNC CACHE */ + if ((us->fflags & US_FL_SENSE_AFTER_SYNC) && + srb->cmnd[0] == SYNCHRONIZE_CACHE) { + usb_stor_dbg(us, "-- sense after SYNC CACHE\n"); + need_auto_sense = 1; + } + /* * If we have a failure, we're going to do a REQUEST_SENSE * automatically. Note that we differentiate between a command diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h index 5732e9691f08..efa972be2ee3 100644 --- a/drivers/usb/storage/unusual_devs.h +++ b/drivers/usb/storage/unusual_devs.h @@ -2211,6 +2211,18 @@ UNUSUAL_DEV( 0x1908, 0x3335, 0x0200, 0x0200, USB_SC_DEVICE, USB_PR_DEVICE, NULL, US_FL_NO_READ_DISC_INFO ), +/* + * Reported by Matthias Schwarzott + * The Amazon Kindle treats SYNCHRONIZE CACHE as an indication that + * the host may be finished with it, and automatically ejects its + * emulated media unless it receives another command within one second. + */ +UNUSUAL_DEV( 0x1949, 0x0004, 0x0000, 0x9999, + "Amazon", + "Kindle", + USB_SC_DEVICE, USB_PR_DEVICE, NULL, + US_FL_SENSE_AFTER_SYNC ), + /* * Reported by Oliver Neukum * This device morphes spontaneously into another device if the access diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 22a85b396f69..3cd4859ffab5 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -797,6 +797,7 @@ static int tcpm_set_current_limit(struct tcpm_port *port, u32 max_ma, u32 mv) port->supply_voltage = mv; port->current_limit = max_ma; + power_supply_changed(port->psy); if (port->tcpc->set_current_limit) ret = port->tcpc->set_current_limit(port->tcpc, max_ma, mv); @@ -2345,6 +2346,7 @@ static int tcpm_pd_select_pdo(struct tcpm_port *port, int *sink_pdo, port->pps_data.supported = false; port->usb_type = POWER_SUPPLY_USB_TYPE_PD; + power_supply_changed(port->psy); /* * Select the source PDO providing the most power which has a @@ -2369,6 +2371,7 @@ static int tcpm_pd_select_pdo(struct tcpm_port *port, int *sink_pdo, port->pps_data.supported = true; port->usb_type = POWER_SUPPLY_USB_TYPE_PD_PPS; + power_supply_changed(port->psy); } continue; default: @@ -2526,6 +2529,7 @@ static unsigned int tcpm_pd_select_pps_apdo(struct tcpm_port *port) port->pps_data.out_volt)); port->pps_data.op_curr = min(port->pps_data.max_curr, port->pps_data.op_curr); + power_supply_changed(port->psy); } return src_pdo; @@ -2761,6 +2765,7 @@ static int tcpm_set_charge(struct tcpm_port *port, bool charge) return ret; } port->vbus_charge = charge; + power_supply_changed(port->psy); return 0; } @@ -2935,6 +2940,7 @@ static void tcpm_reset_port(struct tcpm_port *port) port->try_src_count = 0; port->try_snk_count = 0; port->usb_type = POWER_SUPPLY_USB_TYPE_C; + power_supply_changed(port->psy); port->nr_sink_caps = 0; port->sink_cap_done = false; if (port->tcpc->enable_frs) @@ -5129,7 +5135,7 @@ static int tcpm_psy_set_prop(struct power_supply *psy, ret = -EINVAL; break; } - + power_supply_changed(port->psy); return ret; } @@ -5281,6 +5287,7 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc) err = devm_tcpm_psy_register(port); if (err) goto out_role_sw_put; + power_supply_changed(port->psy); port->typec_port = typec_register_port(port->dev, &port->typec_caps); if (IS_ERR(port->typec_port)) { diff --git a/drivers/usb/typec/tps6598x.c b/drivers/usb/typec/tps6598x.c index 6e6ef6317523..29bd1c5a283c 100644 --- a/drivers/usb/typec/tps6598x.c +++ b/drivers/usb/typec/tps6598x.c @@ -64,7 +64,6 @@ enum { struct tps6598x_rx_identity_reg { u8 status; struct usb_pd_identity identity; - u32 vdo[3]; } __packed; /* Standard Task return codes */ diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index 2305d425e6c9..8f1de1fbbeed 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -46,6 +46,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a int sockfd = 0; struct socket *socket; int rv; + struct task_struct *tcp_rx = NULL; + struct task_struct *tcp_tx = NULL; if (!sdev) { dev_err(dev, "sdev is null\n"); @@ -69,23 +71,47 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a } socket = sockfd_lookup(sockfd, &err); - if (!socket) + if (!socket) { + dev_err(dev, "failed to lookup sock"); goto err; + } - sdev->ud.tcp_socket = socket; - sdev->ud.sockfd = sockfd; + if (socket->type != SOCK_STREAM) { + dev_err(dev, "Expecting SOCK_STREAM - found %d", + socket->type); + goto sock_err; + } + /* unlock and create threads and get tasks */ spin_unlock_irq(&sdev->ud.lock); + tcp_rx = kthread_create(stub_rx_loop, &sdev->ud, "stub_rx"); + if (IS_ERR(tcp_rx)) { + sockfd_put(socket); + return -EINVAL; + } + tcp_tx = kthread_create(stub_tx_loop, &sdev->ud, "stub_tx"); + if (IS_ERR(tcp_tx)) { + kthread_stop(tcp_rx); + sockfd_put(socket); + return -EINVAL; + } - sdev->ud.tcp_rx = kthread_get_run(stub_rx_loop, &sdev->ud, - "stub_rx"); - sdev->ud.tcp_tx = kthread_get_run(stub_tx_loop, &sdev->ud, - "stub_tx"); + /* get task structs now */ + get_task_struct(tcp_rx); + get_task_struct(tcp_tx); + /* lock and update sdev->ud state */ spin_lock_irq(&sdev->ud.lock); + sdev->ud.tcp_socket = socket; + sdev->ud.sockfd = sockfd; + sdev->ud.tcp_rx = tcp_rx; + sdev->ud.tcp_tx = tcp_tx; sdev->ud.status = SDEV_ST_USED; spin_unlock_irq(&sdev->ud.lock); + wake_up_process(sdev->ud.tcp_rx); + wake_up_process(sdev->ud.tcp_tx); + } else { dev_info(dev, "stub down\n"); @@ -100,6 +126,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a return count; +sock_err: + sockfd_put(socket); err: spin_unlock_irq(&sdev->ud.lock); return -EINVAL; diff --git a/drivers/usb/usbip/vhci_sysfs.c b/drivers/usb/usbip/vhci_sysfs.c index be37aec250c2..e64ea314930b 100644 --- a/drivers/usb/usbip/vhci_sysfs.c +++ b/drivers/usb/usbip/vhci_sysfs.c @@ -312,6 +312,8 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr, struct vhci *vhci; int err; unsigned long flags; + struct task_struct *tcp_rx = NULL; + struct task_struct *tcp_tx = NULL; /* * @rhport: port number of vhci_hcd @@ -349,12 +351,35 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr, /* Extract socket from fd. */ socket = sockfd_lookup(sockfd, &err); - if (!socket) + if (!socket) { + dev_err(dev, "failed to lookup sock"); return -EINVAL; + } + if (socket->type != SOCK_STREAM) { + dev_err(dev, "Expecting SOCK_STREAM - found %d", + socket->type); + sockfd_put(socket); + return -EINVAL; + } + + /* create threads before locking */ + tcp_rx = kthread_create(vhci_rx_loop, &vdev->ud, "vhci_rx"); + if (IS_ERR(tcp_rx)) { + sockfd_put(socket); + return -EINVAL; + } + tcp_tx = kthread_create(vhci_tx_loop, &vdev->ud, "vhci_tx"); + if (IS_ERR(tcp_tx)) { + kthread_stop(tcp_rx); + sockfd_put(socket); + return -EINVAL; + } - /* now need lock until setting vdev status as used */ + /* get task structs now */ + get_task_struct(tcp_rx); + get_task_struct(tcp_tx); - /* begin a lock */ + /* now begin lock until setting vdev status set */ spin_lock_irqsave(&vhci->lock, flags); spin_lock(&vdev->ud.lock); @@ -364,6 +389,8 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr, spin_unlock_irqrestore(&vhci->lock, flags); sockfd_put(socket); + kthread_stop_put(tcp_rx); + kthread_stop_put(tcp_tx); dev_err(dev, "port %d already used\n", rhport); /* @@ -382,14 +409,16 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr, vdev->speed = speed; vdev->ud.sockfd = sockfd; vdev->ud.tcp_socket = socket; + vdev->ud.tcp_rx = tcp_rx; + vdev->ud.tcp_tx = tcp_tx; vdev->ud.status = VDEV_ST_NOTASSIGNED; spin_unlock(&vdev->ud.lock); spin_unlock_irqrestore(&vhci->lock, flags); /* end the lock */ - vdev->ud.tcp_rx = kthread_get_run(vhci_rx_loop, &vdev->ud, "vhci_rx"); - vdev->ud.tcp_tx = kthread_get_run(vhci_tx_loop, &vdev->ud, "vhci_tx"); + wake_up_process(vdev->ud.tcp_rx); + wake_up_process(vdev->ud.tcp_tx); rh_port_connect(vdev, speed); diff --git a/drivers/usb/usbip/vudc_sysfs.c b/drivers/usb/usbip/vudc_sysfs.c index 100f680c572a..7383a543c6d1 100644 --- a/drivers/usb/usbip/vudc_sysfs.c +++ b/drivers/usb/usbip/vudc_sysfs.c @@ -90,8 +90,9 @@ static ssize_t dev_desc_read(struct file *file, struct kobject *kobj, } static BIN_ATTR_RO(dev_desc, sizeof(struct usb_device_descriptor)); -static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *attr, - const char *in, size_t count) +static ssize_t usbip_sockfd_store(struct device *dev, + struct device_attribute *attr, + const char *in, size_t count) { struct vudc *udc = (struct vudc *) dev_get_drvdata(dev); int rv; @@ -100,6 +101,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a struct socket *socket; unsigned long flags; int ret; + struct task_struct *tcp_rx = NULL; + struct task_struct *tcp_tx = NULL; rv = kstrtoint(in, 0, &sockfd); if (rv != 0) @@ -138,24 +141,54 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a goto unlock_ud; } - udc->ud.tcp_socket = socket; + if (socket->type != SOCK_STREAM) { + dev_err(dev, "Expecting SOCK_STREAM - found %d", + socket->type); + ret = -EINVAL; + goto sock_err; + } + /* unlock and create threads and get tasks */ spin_unlock_irq(&udc->ud.lock); spin_unlock_irqrestore(&udc->lock, flags); - udc->ud.tcp_rx = kthread_get_run(&v_rx_loop, - &udc->ud, "vudc_rx"); - udc->ud.tcp_tx = kthread_get_run(&v_tx_loop, - &udc->ud, "vudc_tx"); + tcp_rx = kthread_create(&v_rx_loop, &udc->ud, "vudc_rx"); + if (IS_ERR(tcp_rx)) { + sockfd_put(socket); + return -EINVAL; + } + tcp_tx = kthread_create(&v_tx_loop, &udc->ud, "vudc_tx"); + if (IS_ERR(tcp_tx)) { + kthread_stop(tcp_rx); + sockfd_put(socket); + return -EINVAL; + } + + /* get task structs now */ + get_task_struct(tcp_rx); + get_task_struct(tcp_tx); + /* lock and update udc->ud state */ spin_lock_irqsave(&udc->lock, flags); spin_lock_irq(&udc->ud.lock); + + udc->ud.tcp_socket = socket; + udc->ud.tcp_rx = tcp_rx; + udc->ud.tcp_tx = tcp_tx; udc->ud.status = SDEV_ST_USED; + spin_unlock_irq(&udc->ud.lock); ktime_get_ts64(&udc->start_time); v_start_timer(udc); udc->connected = 1; + + spin_unlock_irqrestore(&udc->lock, flags); + + wake_up_process(udc->ud.tcp_rx); + wake_up_process(udc->ud.tcp_tx); + return count; + } else { if (!udc->connected) { dev_err(dev, "Device not connected"); @@ -177,6 +210,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a return count; +sock_err: + sockfd_put(socket); unlock_ud: spin_unlock_irq(&udc->ud.lock); unlock: diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index b5fe6d2ad22f..25fd971be63f 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -1820,7 +1820,7 @@ static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); - if (offset + len < sizeof(struct virtio_net_config)) + if (offset + len <= sizeof(struct virtio_net_config)) memcpy(buf, (u8 *)&ndev->config + offset, len); } diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 5533df91b257..90c0525b1e0c 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -21,7 +21,7 @@ config VFIO_VIRQFD menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" - depends on IOMMU_API + select IOMMU_API select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM || ARM64) help VFIO provides a framework for secure userspace device drivers. diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c index 229685634031..1bb7edac5689 100644 --- a/drivers/vfio/pci/vfio_pci_zdev.c +++ b/drivers/vfio/pci/vfio_pci_zdev.c @@ -74,6 +74,8 @@ static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, int ret; cap = kmalloc(cap_size, GFP_KERNEL); + if (!cap) + return -ENOMEM; cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_UTIL; cap->header.version = 1; @@ -98,6 +100,8 @@ static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, int ret; cap = kmalloc(cap_size, GFP_KERNEL); + if (!cap) + return -ENOMEM; cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_PFIP; cap->header.version = 1; diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 0b4dedaa9128..3dca6aab0f1f 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -236,6 +237,18 @@ static void vfio_dma_populate_bitmap(struct vfio_dma *dma, size_t pgsize) } } +static void vfio_iommu_populate_bitmap_full(struct vfio_iommu *iommu) +{ + struct rb_node *n; + unsigned long pgshift = __ffs(iommu->pgsize_bitmap); + + for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) { + struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node); + + bitmap_set(dma->bitmap, 0, dma->size >> pgshift); + } +} + static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu, size_t pgsize) { struct rb_node *n; @@ -419,9 +432,11 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, unsigned long vaddr, unsigned long *pfn, bool write_fault) { + pte_t *ptep; + spinlock_t *ptl; int ret; - ret = follow_pfn(vma, vaddr, pfn); + ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl); if (ret) { bool unlocked = false; @@ -435,9 +450,17 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, if (ret) return ret; - ret = follow_pfn(vma, vaddr, pfn); + ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl); + if (ret) + return ret; } + if (write_fault && !pte_write(*ptep)) + ret = -EFAULT; + else + *pfn = pte_pfn(*ptep); + + pte_unmap_unlock(ptep, ptl); return ret; } @@ -945,6 +968,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) { + WARN_ON(!RB_EMPTY_ROOT(&dma->pfn_list)); vfio_unmap_unpin(iommu, dma, true); vfio_unlink_dma(iommu, dma); put_task_struct(dma->task); @@ -2238,23 +2262,6 @@ static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu) } } -static void vfio_sanity_check_pfn_list(struct vfio_iommu *iommu) -{ - struct rb_node *n; - - n = rb_first(&iommu->dma_list); - for (; n; n = rb_next(n)) { - struct vfio_dma *dma; - - dma = rb_entry(n, struct vfio_dma, node); - - if (WARN_ON(!RB_EMPTY_ROOT(&dma->pfn_list))) - break; - } - /* mdev vendor driver must unregister notifier */ - WARN_ON(iommu->notifier.head); -} - /* * Called when a domain is removed in detach. It is possible that * the removed domain decided the iova aperture window. Modify the @@ -2354,10 +2361,10 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, kfree(group); if (list_empty(&iommu->external_domain->group_list)) { - vfio_sanity_check_pfn_list(iommu); - - if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)) + if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)) { + WARN_ON(iommu->notifier.head); vfio_iommu_unmap_unpin_all(iommu); + } kfree(iommu->external_domain); iommu->external_domain = NULL; @@ -2391,10 +2398,12 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, */ if (list_empty(&domain->group_list)) { if (list_is_singular(&iommu->domain_list)) { - if (!iommu->external_domain) + if (!iommu->external_domain) { + WARN_ON(iommu->notifier.head); vfio_iommu_unmap_unpin_all(iommu); - else + } else { vfio_iommu_unmap_unpin_reaccount(iommu); + } } iommu_domain_free(domain->domain); list_del(&domain->next); @@ -2415,8 +2424,11 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, * Removal of a group without dirty tracking may allow the iommu scope * to be promoted. */ - if (update_dirty_scope) + if (update_dirty_scope) { update_pinned_page_dirty_scope(iommu); + if (iommu->dirty_page_tracking) + vfio_iommu_populate_bitmap_full(iommu); + } mutex_unlock(&iommu->lock); } @@ -2475,7 +2487,6 @@ static void vfio_iommu_type1_release(void *iommu_data) if (iommu->external_domain) { vfio_release_domain(iommu->external_domain, true); - vfio_sanity_check_pfn_list(iommu); kfree(iommu->external_domain); } diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index ef688c8c0e0e..e0a27e336293 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -308,8 +308,10 @@ static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp) static void vhost_vdpa_config_put(struct vhost_vdpa *v) { - if (v->config_ctx) + if (v->config_ctx) { eventfd_ctx_put(v->config_ctx); + v->config_ctx = NULL; + } } static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) @@ -329,8 +331,12 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) if (!IS_ERR_OR_NULL(ctx)) eventfd_ctx_put(ctx); - if (IS_ERR(v->config_ctx)) - return PTR_ERR(v->config_ctx); + if (IS_ERR(v->config_ctx)) { + long ret = PTR_ERR(v->config_ctx); + + v->config_ctx = NULL; + return ret; + } v->vdpa->config->set_config_cb(v->vdpa, &cb); @@ -900,14 +906,10 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) static void vhost_vdpa_clean_irq(struct vhost_vdpa *v) { - struct vhost_virtqueue *vq; int i; - for (i = 0; i < v->nvqs; i++) { - vq = &v->vqs[i]; - if (vq->call_ctx.producer.irq) - irq_bypass_unregister_producer(&vq->call_ctx.producer); - } + for (i = 0; i < v->nvqs; i++) + vhost_vdpa_unsetup_vq_irq(v, i); } static int vhost_vdpa_release(struct inode *inode, struct file *filep) diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index cfb7f5612ef0..4f02db65dede 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -1269,6 +1269,7 @@ config FB_ATY select FB_CFB_IMAGEBLIT select FB_BACKLIGHT if FB_ATY_BACKLIGHT select FB_MACMODES if PPC + select FB_ATY_CT if SPARC64 && PCI help This driver supports graphics boards with the ATI Mach64 chips. Say Y if you have such a graphics board. @@ -1279,7 +1280,6 @@ config FB_ATY config FB_ATY_CT bool "Mach64 CT/VT/GT/LT (incl. 3D RAGE) support" depends on PCI && FB_ATY - default y if SPARC64 && PCI help Say Y here to support use of ATI's 64-bit Rage boards (or other boards based on the Mach64 CT, VT, GT, and LT chipsets) as a diff --git a/drivers/video/fbdev/udlfb.c b/drivers/video/fbdev/udlfb.c index f9b3c1cb9530..b9cdd02c1000 100644 --- a/drivers/video/fbdev/udlfb.c +++ b/drivers/video/fbdev/udlfb.c @@ -1017,6 +1017,7 @@ static void dlfb_ops_destroy(struct fb_info *info) } vfree(dlfb->backing_buffer); kfree(dlfb->edid); + dlfb_free_urb_list(dlfb); usb_put_dev(dlfb->udev); kfree(dlfb); diff --git a/drivers/virt/vboxguest/vboxguest_utils.c b/drivers/virt/vboxguest/vboxguest_utils.c index ea05af41ec69..8d195e3f8301 100644 --- a/drivers/virt/vboxguest/vboxguest_utils.c +++ b/drivers/virt/vboxguest/vboxguest_utils.c @@ -468,7 +468,7 @@ static int hgcm_cancel_call(struct vbg_dev *gdev, struct vmmdev_hgcm_call *call) * Cancellation fun. */ static int vbg_hgcm_do_call(struct vbg_dev *gdev, struct vmmdev_hgcm_call *call, - u32 timeout_ms, bool *leak_it) + u32 timeout_ms, bool interruptible, bool *leak_it) { int rc, cancel_rc, ret; long timeout; @@ -495,10 +495,15 @@ static int vbg_hgcm_do_call(struct vbg_dev *gdev, struct vmmdev_hgcm_call *call, else timeout = msecs_to_jiffies(timeout_ms); - timeout = wait_event_interruptible_timeout( - gdev->hgcm_wq, - hgcm_req_done(gdev, &call->header), - timeout); + if (interruptible) { + timeout = wait_event_interruptible_timeout(gdev->hgcm_wq, + hgcm_req_done(gdev, &call->header), + timeout); + } else { + timeout = wait_event_timeout(gdev->hgcm_wq, + hgcm_req_done(gdev, &call->header), + timeout); + } /* timeout > 0 means hgcm_req_done has returned true, so success */ if (timeout > 0) @@ -631,7 +636,8 @@ int vbg_hgcm_call(struct vbg_dev *gdev, u32 requestor, u32 client_id, hgcm_call_init_call(call, client_id, function, parms, parm_count, bounce_bufs); - ret = vbg_hgcm_do_call(gdev, call, timeout_ms, &leak_it); + ret = vbg_hgcm_do_call(gdev, call, timeout_ms, + requestor & VMMDEV_REQUESTOR_USERMODE, &leak_it); if (ret == 0) { *vbox_status = call->header.result; ret = hgcm_call_copy_back_result(call, parms, parm_count, diff --git a/drivers/w1/slaves/w1_therm.c b/drivers/w1/slaves/w1_therm.c index 3712b1e6dc71..976eea28f268 100644 --- a/drivers/w1/slaves/w1_therm.c +++ b/drivers/w1/slaves/w1_therm.c @@ -667,28 +667,24 @@ static inline int w1_DS18B20_get_resolution(struct w1_slave *sl) */ static inline int w1_DS18B20_convert_temp(u8 rom[9]) { - int t; - u32 bv; + u16 bv; + s16 t; + + /* Signed 16-bit value to unsigned, cpu order */ + bv = le16_to_cpup((__le16 *)rom); /* Config register bit R2 = 1 - GX20MH01 in 13 or 14 bit resolution mode */ if (rom[4] & 0x80) { - /* Signed 16-bit value to unsigned, cpu order */ - bv = le16_to_cpup((__le16 *)rom); - /* Insert two temperature bits from config register */ /* Avoid arithmetic shift of signed value */ bv = (bv << 2) | (rom[4] & 3); - - t = (int) sign_extend32(bv, 17); /* Degrees, lowest bit is 2^-6 */ - return (t*1000)/64; /* Millidegrees */ + t = (s16) bv; /* Degrees, lowest bit is 2^-6 */ + return (int)t * 1000 / 64; /* Sign-extend to int; millidegrees */ } - - t = (int)le16_to_cpup((__le16 *)rom); - return t*1000/16; + t = (s16)bv; /* Degrees, lowest bit is 2^-4 */ + return (int)t * 1000 / 16; /* Sign-extend to int; millidegrees */ } - - /** * w1_DS18S20_convert_temp() - temperature computation for DS18S20 * @rom: data read from device RAM (8 data bytes + 1 CRC byte) diff --git a/drivers/watchdog/intel-mid_wdt.c b/drivers/watchdog/intel-mid_wdt.c index 1ae03b64ef8b..9b2173f765c8 100644 --- a/drivers/watchdog/intel-mid_wdt.c +++ b/drivers/watchdog/intel-mid_wdt.c @@ -154,6 +154,10 @@ static int mid_wdt_probe(struct platform_device *pdev) watchdog_set_nowayout(wdt_dev, WATCHDOG_NOWAYOUT); watchdog_set_drvdata(wdt_dev, mid); + mid->scu = devm_intel_scu_ipc_dev_get(dev); + if (!mid->scu) + return -EPROBE_DEFER; + ret = devm_request_irq(dev, pdata->irq, mid_wdt_irq, IRQF_SHARED | IRQF_NO_SUSPEND, "watchdog", wdt_dev); @@ -162,10 +166,6 @@ static int mid_wdt_probe(struct platform_device *pdev) return ret; } - mid->scu = devm_intel_scu_ipc_dev_get(dev); - if (!mid->scu) - return -EPROBE_DEFER; - /* * The firmware followed by U-Boot leaves the watchdog running * with the default threshold which may vary. When we get here diff --git a/drivers/watchdog/mei_wdt.c b/drivers/watchdog/mei_wdt.c index 5391bf3e6b11..c5967d8b4256 100644 --- a/drivers/watchdog/mei_wdt.c +++ b/drivers/watchdog/mei_wdt.c @@ -382,6 +382,7 @@ static int mei_wdt_register(struct mei_wdt *wdt) watchdog_set_drvdata(&wdt->wdd, wdt); watchdog_stop_on_reboot(&wdt->wdd); + watchdog_stop_on_unregister(&wdt->wdd); ret = watchdog_register_device(&wdt->wdd); if (ret) diff --git a/drivers/watchdog/qcom-wdt.c b/drivers/watchdog/qcom-wdt.c index 7cf0f2ec649b..e38a87ffe5f5 100644 --- a/drivers/watchdog/qcom-wdt.c +++ b/drivers/watchdog/qcom-wdt.c @@ -22,7 +22,6 @@ enum wdt_reg { }; #define QCOM_WDT_ENABLE BIT(0) -#define QCOM_WDT_ENABLE_IRQ BIT(1) static const u32 reg_offset_data_apcs_tmr[] = { [WDT_RST] = 0x38, @@ -63,16 +62,6 @@ struct qcom_wdt *to_qcom_wdt(struct watchdog_device *wdd) return container_of(wdd, struct qcom_wdt, wdd); } -static inline int qcom_get_enable(struct watchdog_device *wdd) -{ - int enable = QCOM_WDT_ENABLE; - - if (wdd->pretimeout) - enable |= QCOM_WDT_ENABLE_IRQ; - - return enable; -} - static irqreturn_t qcom_wdt_isr(int irq, void *arg) { struct watchdog_device *wdd = arg; @@ -91,7 +80,7 @@ static int qcom_wdt_start(struct watchdog_device *wdd) writel(1, wdt_addr(wdt, WDT_RST)); writel(bark * wdt->rate, wdt_addr(wdt, WDT_BARK_TIME)); writel(wdd->timeout * wdt->rate, wdt_addr(wdt, WDT_BITE_TIME)); - writel(qcom_get_enable(wdd), wdt_addr(wdt, WDT_EN)); + writel(QCOM_WDT_ENABLE, wdt_addr(wdt, WDT_EN)); return 0; } diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index 41645fe6ad48..ea0efd290c37 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -50,11 +50,11 @@ config XEN_BALLOON_MEMORY_HOTPLUG SUBSYSTEM=="memory", ACTION=="add", RUN+="/bin/sh -c '[ -f /sys$devpath/state ] && echo online > /sys$devpath/state'" -config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT +config XEN_MEMORY_HOTPLUG_LIMIT int "Hotplugged memory limit (in GiB) for a PV guest" default 512 depends on XEN_HAVE_PVMMU - depends on XEN_BALLOON_MEMORY_HOTPLUG + depends on MEMORY_HOTPLUG help Maxmium amount of memory (in GiB) that a PV guest can be expanded to when using memory hotplug. diff --git a/drivers/xen/events/events_2l.c b/drivers/xen/events/events_2l.c index da87f3a1e351..b8f2f971c2f0 100644 --- a/drivers/xen/events/events_2l.c +++ b/drivers/xen/events/events_2l.c @@ -47,6 +47,11 @@ static unsigned evtchn_2l_max_channels(void) return EVTCHN_2L_NR_CHANNELS; } +static void evtchn_2l_remove(evtchn_port_t evtchn, unsigned int cpu) +{ + clear_bit(evtchn, BM(per_cpu(cpu_evtchn_mask, cpu))); +} + static void evtchn_2l_bind_to_cpu(evtchn_port_t evtchn, unsigned int cpu, unsigned int old_cpu) { @@ -72,12 +77,6 @@ static bool evtchn_2l_is_pending(evtchn_port_t port) return sync_test_bit(port, BM(&s->evtchn_pending[0])); } -static bool evtchn_2l_test_and_set_mask(evtchn_port_t port) -{ - struct shared_info *s = HYPERVISOR_shared_info; - return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0])); -} - static void evtchn_2l_mask(evtchn_port_t port) { struct shared_info *s = HYPERVISOR_shared_info; @@ -355,18 +354,27 @@ static void evtchn_2l_resume(void) EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD); } +static int evtchn_2l_percpu_deinit(unsigned int cpu) +{ + memset(per_cpu(cpu_evtchn_mask, cpu), 0, sizeof(xen_ulong_t) * + EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD); + + return 0; +} + static const struct evtchn_ops evtchn_ops_2l = { .max_channels = evtchn_2l_max_channels, .nr_channels = evtchn_2l_max_channels, + .remove = evtchn_2l_remove, .bind_to_cpu = evtchn_2l_bind_to_cpu, .clear_pending = evtchn_2l_clear_pending, .set_pending = evtchn_2l_set_pending, .is_pending = evtchn_2l_is_pending, - .test_and_set_mask = evtchn_2l_test_and_set_mask, .mask = evtchn_2l_mask, .unmask = evtchn_2l_unmask, .handle_events = evtchn_2l_handle_events, .resume = evtchn_2l_resume, + .percpu_deinit = evtchn_2l_percpu_deinit, }; void __init xen_evtchn_2l_init(void) diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index e850f79351cb..d9148609bd09 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -97,13 +97,19 @@ struct irq_info { short refcnt; u8 spurious_cnt; u8 is_accounted; - enum xen_irq_type type; /* type */ + short type; /* type: IRQT_* */ + u8 mask_reason; /* Why is event channel masked */ +#define EVT_MASK_REASON_EXPLICIT 0x01 +#define EVT_MASK_REASON_TEMPORARY 0x02 +#define EVT_MASK_REASON_EOI_PENDING 0x04 + u8 is_active; /* Is event just being handled? */ unsigned irq; evtchn_port_t evtchn; /* event channel */ unsigned short cpu; /* cpu bound */ unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */ unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */ u64 eoi_time; /* Time in jiffies when to EOI. */ + spinlock_t lock; union { unsigned short virq; @@ -152,6 +158,7 @@ static DEFINE_RWLOCK(evtchn_rwlock); * evtchn_rwlock * IRQ-desc lock * percpu eoi_list_lock + * irq_info->lock */ static LIST_HEAD(xen_irq_list_head); @@ -302,6 +309,8 @@ static int xen_irq_info_common_setup(struct irq_info *info, info->irq = irq; info->evtchn = evtchn; info->cpu = cpu; + info->mask_reason = EVT_MASK_REASON_EXPLICIT; + spin_lock_init(&info->lock); ret = set_evtchn_to_irq(evtchn, irq); if (ret < 0) @@ -368,6 +377,7 @@ static int xen_irq_info_pirq_setup(unsigned irq, static void xen_irq_info_cleanup(struct irq_info *info) { set_evtchn_to_irq(info->evtchn, -1); + xen_evtchn_port_remove(info->evtchn, info->cpu); info->evtchn = 0; channels_on_cpu_dec(info); } @@ -449,6 +459,34 @@ unsigned int cpu_from_evtchn(evtchn_port_t evtchn) return ret; } +static void do_mask(struct irq_info *info, u8 reason) +{ + unsigned long flags; + + spin_lock_irqsave(&info->lock, flags); + + if (!info->mask_reason) + mask_evtchn(info->evtchn); + + info->mask_reason |= reason; + + spin_unlock_irqrestore(&info->lock, flags); +} + +static void do_unmask(struct irq_info *info, u8 reason) +{ + unsigned long flags; + + spin_lock_irqsave(&info->lock, flags); + + info->mask_reason &= ~reason; + + if (!info->mask_reason) + unmask_evtchn(info->evtchn); + + spin_unlock_irqrestore(&info->lock, flags); +} + #ifdef CONFIG_X86 static bool pirq_check_eoi_map(unsigned irq) { @@ -585,7 +623,7 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious) } info->eoi_time = 0; - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_EOI_PENDING); } static void xen_irq_lateeoi_worker(struct work_struct *work) @@ -754,6 +792,12 @@ static void xen_evtchn_close(evtchn_port_t port) BUG(); } +static void event_handler_exit(struct irq_info *info) +{ + smp_store_release(&info->is_active, 0); + clear_evtchn(info->evtchn); +} + static void pirq_query_unmask(int irq) { struct physdev_irq_status_query irq_status; @@ -772,14 +816,15 @@ static void pirq_query_unmask(int irq) static void eoi_pirq(struct irq_data *data) { - evtchn_port_t evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) }; int rc = 0; if (!VALID_EVTCHN(evtchn)) return; - clear_evtchn(evtchn); + event_handler_exit(info); if (pirq_needs_eoi(data->irq)) { rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi); @@ -830,7 +875,8 @@ static unsigned int __startup_pirq(unsigned int irq) goto err; out: - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_EXPLICIT); + eoi_pirq(irq_get_irq_data(irq)); return 0; @@ -857,7 +903,7 @@ static void shutdown_pirq(struct irq_data *data) if (!VALID_EVTCHN(evtchn)) return; - mask_evtchn(evtchn); + do_mask(info, EVT_MASK_REASON_EXPLICIT); xen_evtchn_close(evtchn); xen_irq_info_cleanup(info); } @@ -1602,6 +1648,8 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl) } info = info_for_irq(irq); + if (xchg_acquire(&info->is_active, 1)) + return; if (ctrl->defer_eoi) { info->eoi_cpu = smp_processor_id(); @@ -1690,10 +1738,10 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq) } /* Rebind an evtchn so that it gets delivered to a specific cpu */ -static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu) +static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu) { struct evtchn_bind_vcpu bind_vcpu; - int masked; + evtchn_port_t evtchn = info ? info->evtchn : 0; if (!VALID_EVTCHN(evtchn)) return -1; @@ -1709,7 +1757,7 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu) * Mask the event while changing the VCPU binding to prevent * it being delivered on an unexpected VCPU. */ - masked = test_and_set_mask(evtchn); + do_mask(info, EVT_MASK_REASON_TEMPORARY); /* * If this fails, it usually just indicates that we're dealing with a @@ -1719,8 +1767,7 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu) if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0) bind_evtchn_to_cpu(evtchn, tcpu, false); - if (!masked) - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_TEMPORARY); return 0; } @@ -1759,7 +1806,7 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, unsigned int tcpu = select_target_cpu(dest); int ret; - ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu); + ret = xen_rebind_evtchn_to_cpu(info_for_irq(data->irq), tcpu); if (!ret) irq_data_update_effective_affinity(data, cpumask_of(tcpu)); @@ -1768,28 +1815,29 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest, static void enable_dynirq(struct irq_data *data) { - evtchn_port_t evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (VALID_EVTCHN(evtchn)) - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_EXPLICIT); } static void disable_dynirq(struct irq_data *data) { - evtchn_port_t evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (VALID_EVTCHN(evtchn)) - mask_evtchn(evtchn); + do_mask(info, EVT_MASK_REASON_EXPLICIT); } static void ack_dynirq(struct irq_data *data) { - evtchn_port_t evtchn = evtchn_from_irq(data->irq); + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; - if (!VALID_EVTCHN(evtchn)) - return; - - clear_evtchn(evtchn); + if (VALID_EVTCHN(evtchn)) + event_handler_exit(info); } static void mask_ack_dynirq(struct irq_data *data) @@ -1798,18 +1846,39 @@ static void mask_ack_dynirq(struct irq_data *data) ack_dynirq(data); } +static void lateeoi_ack_dynirq(struct irq_data *data) +{ + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; + + if (VALID_EVTCHN(evtchn)) { + do_mask(info, EVT_MASK_REASON_EOI_PENDING); + event_handler_exit(info); + } +} + +static void lateeoi_mask_ack_dynirq(struct irq_data *data) +{ + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; + + if (VALID_EVTCHN(evtchn)) { + do_mask(info, EVT_MASK_REASON_EXPLICIT); + event_handler_exit(info); + } +} + static int retrigger_dynirq(struct irq_data *data) { - evtchn_port_t evtchn = evtchn_from_irq(data->irq); - int masked; + struct irq_info *info = info_for_irq(data->irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (!VALID_EVTCHN(evtchn)) return 0; - masked = test_and_set_mask(evtchn); + do_mask(info, EVT_MASK_REASON_TEMPORARY); set_evtchn(evtchn); - if (!masked) - unmask_evtchn(evtchn); + do_unmask(info, EVT_MASK_REASON_TEMPORARY); return 1; } @@ -1908,10 +1977,11 @@ static void restore_cpu_ipis(unsigned int cpu) /* Clear an irq's pending state, in preparation for polling on it */ void xen_clear_irq_pending(int irq) { - evtchn_port_t evtchn = evtchn_from_irq(irq); + struct irq_info *info = info_for_irq(irq); + evtchn_port_t evtchn = info ? info->evtchn : 0; if (VALID_EVTCHN(evtchn)) - clear_evtchn(evtchn); + event_handler_exit(info); } EXPORT_SYMBOL(xen_clear_irq_pending); void xen_set_irq_pending(int irq) @@ -2023,8 +2093,8 @@ static struct irq_chip xen_lateeoi_chip __read_mostly = { .irq_mask = disable_dynirq, .irq_unmask = enable_dynirq, - .irq_ack = mask_ack_dynirq, - .irq_mask_ack = mask_ack_dynirq, + .irq_ack = lateeoi_ack_dynirq, + .irq_mask_ack = lateeoi_mask_ack_dynirq, .irq_set_affinity = set_affinity_irq, .irq_retrigger = retrigger_dynirq, diff --git a/drivers/xen/events/events_fifo.c b/drivers/xen/events/events_fifo.c index b234f1766810..ad9fe51d3fb3 100644 --- a/drivers/xen/events/events_fifo.c +++ b/drivers/xen/events/events_fifo.c @@ -209,12 +209,6 @@ static bool evtchn_fifo_is_pending(evtchn_port_t port) return sync_test_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word)); } -static bool evtchn_fifo_test_and_set_mask(evtchn_port_t port) -{ - event_word_t *word = event_word_from_port(port); - return sync_test_and_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word)); -} - static void evtchn_fifo_mask(evtchn_port_t port) { event_word_t *word = event_word_from_port(port); @@ -423,7 +417,6 @@ static const struct evtchn_ops evtchn_ops_fifo = { .clear_pending = evtchn_fifo_clear_pending, .set_pending = evtchn_fifo_set_pending, .is_pending = evtchn_fifo_is_pending, - .test_and_set_mask = evtchn_fifo_test_and_set_mask, .mask = evtchn_fifo_mask, .unmask = evtchn_fifo_unmask, .handle_events = evtchn_fifo_handle_events, diff --git a/drivers/xen/events/events_internal.h b/drivers/xen/events/events_internal.h index 0a97c0549db7..4d3398eff9cd 100644 --- a/drivers/xen/events/events_internal.h +++ b/drivers/xen/events/events_internal.h @@ -14,13 +14,13 @@ struct evtchn_ops { unsigned (*nr_channels)(void); int (*setup)(evtchn_port_t port); + void (*remove)(evtchn_port_t port, unsigned int cpu); void (*bind_to_cpu)(evtchn_port_t evtchn, unsigned int cpu, unsigned int old_cpu); void (*clear_pending)(evtchn_port_t port); void (*set_pending)(evtchn_port_t port); bool (*is_pending)(evtchn_port_t port); - bool (*test_and_set_mask)(evtchn_port_t port); void (*mask)(evtchn_port_t port); void (*unmask)(evtchn_port_t port); @@ -54,6 +54,13 @@ static inline int xen_evtchn_port_setup(evtchn_port_t evtchn) return 0; } +static inline void xen_evtchn_port_remove(evtchn_port_t evtchn, + unsigned int cpu) +{ + if (evtchn_ops->remove) + evtchn_ops->remove(evtchn, cpu); +} + static inline void xen_evtchn_port_bind_to_cpu(evtchn_port_t evtchn, unsigned int cpu, unsigned int old_cpu) @@ -76,11 +83,6 @@ static inline bool test_evtchn(evtchn_port_t port) return evtchn_ops->is_pending(port); } -static inline bool test_and_set_mask(evtchn_port_t port) -{ - return evtchn_ops->test_and_set_mask(port); -} - static inline void mask_evtchn(evtchn_port_t port) { return evtchn_ops->mask(port); diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c index a36b71286bcf..5447c5156b2e 100644 --- a/drivers/xen/gntdev.c +++ b/drivers/xen/gntdev.c @@ -309,44 +309,47 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map) * to the kernel linear addresses of the struct pages. * These ptes are completely different from the user ptes dealt * with find_grant_ptes. + * Note that GNTMAP_device_map isn't needed here: The + * dev_bus_addr output field gets consumed only from ->map_ops, + * and by not requesting it when mapping we also avoid needing + * to mirror dev_bus_addr into ->unmap_ops (and holding an extra + * reference to the page in the hypervisor). */ + unsigned int flags = (map->flags & ~GNTMAP_device_map) | + GNTMAP_host_map; + for (i = 0; i < map->count; i++) { unsigned long address = (unsigned long) pfn_to_kaddr(page_to_pfn(map->pages[i])); BUG_ON(PageHighMem(map->pages[i])); - gnttab_set_map_op(&map->kmap_ops[i], address, - map->flags | GNTMAP_host_map, + gnttab_set_map_op(&map->kmap_ops[i], address, flags, map->grants[i].ref, map->grants[i].domid); gnttab_set_unmap_op(&map->kunmap_ops[i], address, - map->flags | GNTMAP_host_map, -1); + flags, -1); } } pr_debug("map %d+%d\n", map->index, map->count); err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL, map->pages, map->count); - if (err) - return err; for (i = 0; i < map->count; i++) { - if (map->map_ops[i].status) { + if (map->map_ops[i].status == GNTST_okay) + map->unmap_ops[i].handle = map->map_ops[i].handle; + else if (!err) err = -EINVAL; - continue; - } - map->unmap_ops[i].handle = map->map_ops[i].handle; - if (use_ptemod) - map->kunmap_ops[i].handle = map->kmap_ops[i].handle; -#ifdef CONFIG_XEN_GRANT_DMA_ALLOC - else if (map->dma_vaddr) { - unsigned long bfn; + if (map->flags & GNTMAP_device_map) + map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr; - bfn = pfn_to_bfn(page_to_pfn(map->pages[i])); - map->unmap_ops[i].dev_bus_addr = __pfn_to_phys(bfn); + if (use_ptemod) { + if (map->kmap_ops[i].status == GNTST_okay) + map->kunmap_ops[i].handle = map->kmap_ops[i].handle; + else if (!err) + err = -EINVAL; } -#endif } return err; } diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c index 862162dca33c..9cd4fe8ce680 100644 --- a/drivers/xen/xen-scsiback.c +++ b/drivers/xen/xen-scsiback.c @@ -386,12 +386,12 @@ static int scsiback_gnttab_data_map_batch(struct gnttab_map_grant_ref *map, return 0; err = gnttab_map_refs(map, NULL, pg, cnt); - BUG_ON(err); for (i = 0; i < cnt; i++) { if (unlikely(map[i].status != GNTST_okay)) { pr_err("invalid buffer -- could not remap it\n"); map[i].handle = SCSIBACK_INVALID_HANDLE; - err = -ENOMEM; + if (!err) + err = -ENOMEM; } else { get_page(pg[i]); } diff --git a/fs/Kconfig b/fs/Kconfig index da524c4d7b7e..0bbad356ab57 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -145,6 +145,7 @@ menu "DOS/FAT/EXFAT/NT Filesystems" source "fs/fat/Kconfig" source "fs/exfat/Kconfig" source "fs/ntfs/Kconfig" +source "fs/ntfs3/Kconfig" endmenu endif # BLOCK diff --git a/fs/Makefile b/fs/Makefile index 999d1a23f036..4f5242cdaee2 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -100,6 +100,7 @@ obj-$(CONFIG_SYSV_FS) += sysv/ obj-$(CONFIG_CIFS) += cifs/ obj-$(CONFIG_HPFS_FS) += hpfs/ obj-$(CONFIG_NTFS_FS) += ntfs/ +obj-$(CONFIG_NTFS3_FS) += ntfs3/ obj-$(CONFIG_UFS_FS) += ufs/ obj-$(CONFIG_EFS_FS) += efs/ obj-$(CONFIG_JFFS2_FS) += jffs2/ diff --git a/fs/affs/namei.c b/fs/affs/namei.c index 41c5749f4db7..5400a876d73f 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c @@ -460,8 +460,10 @@ affs_xrename(struct inode *old_dir, struct dentry *old_dentry, return -EIO; bh_new = affs_bread(sb, d_inode(new_dentry)->i_ino); - if (!bh_new) + if (!bh_new) { + affs_brelse(bh_old); return -EIO; + } /* Remove old header from its parent directory. */ affs_lock_dir(old_dir); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 7bd659ad959e..7cb0604e2841 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -69,7 +69,6 @@ const struct inode_operations afs_dir_inode_operations = { .permission = afs_permission, .getattr = afs_getattr, .setattr = afs_setattr, - .listxattr = afs_listxattr, }; const struct address_space_operations afs_dir_aops = { diff --git a/fs/afs/file.c b/fs/afs/file.c index 85f5adf21aa0..960b64268623 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -43,7 +43,6 @@ const struct inode_operations afs_file_inode_operations = { .getattr = afs_getattr, .setattr = afs_setattr, .permission = afs_permission, - .listxattr = afs_listxattr, }; const struct address_space_operations afs_fs_aops = { diff --git a/fs/afs/fs_operation.c b/fs/afs/fs_operation.c index 97cab12b0a6c..71c58723763d 100644 --- a/fs/afs/fs_operation.c +++ b/fs/afs/fs_operation.c @@ -181,10 +181,13 @@ void afs_wait_for_operation(struct afs_operation *op) if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags) && op->ops->issue_yfs_rpc) op->ops->issue_yfs_rpc(op); - else + else if (op->ops->issue_afs_rpc) op->ops->issue_afs_rpc(op); + else + op->ac.error = -ENOTSUPP; - op->error = afs_wait_for_call_to_complete(op->call, &op->ac); + if (op->call) + op->error = afs_wait_for_call_to_complete(op->call, &op->ac); } switch (op->error) { diff --git a/fs/afs/inode.c b/fs/afs/inode.c index b0d7b892090d..1d03eb1920ec 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -27,7 +27,6 @@ static const struct inode_operations afs_symlink_inode_operations = { .get_link = page_get_link, - .listxattr = afs_listxattr, }; static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode) diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 0d150a29e39e..525ef075fcd9 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -1508,7 +1508,6 @@ extern int afs_launder_page(struct page *); * xattr.c */ extern const struct xattr_handler *afs_xattr_handlers[]; -extern ssize_t afs_listxattr(struct dentry *, char *, size_t); /* * yfsclient.c diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 052dab2f5c03..bbb2c210d139 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -32,7 +32,6 @@ const struct inode_operations afs_mntpt_inode_operations = { .lookup = afs_mntpt_lookup, .readlink = page_readlink, .getattr = afs_getattr, - .listxattr = afs_listxattr, }; const struct inode_operations afs_autocell_inode_operations = { diff --git a/fs/afs/xattr.c b/fs/afs/xattr.c index 95c573dcda11..6a29337bd562 100644 --- a/fs/afs/xattr.c +++ b/fs/afs/xattr.c @@ -11,29 +11,6 @@ #include #include "internal.h" -static const char afs_xattr_list[] = - "afs.acl\0" - "afs.cell\0" - "afs.fid\0" - "afs.volume\0" - "afs.yfs.acl\0" - "afs.yfs.acl_inherited\0" - "afs.yfs.acl_num_cleaned\0" - "afs.yfs.vol_acl"; - -/* - * Retrieve a list of the supported xattrs. - */ -ssize_t afs_listxattr(struct dentry *dentry, char *buffer, size_t size) -{ - if (size == 0) - return sizeof(afs_xattr_list); - if (size < sizeof(afs_xattr_list)) - return -ERANGE; - memcpy(buffer, afs_xattr_list, sizeof(afs_xattr_list)); - return sizeof(afs_xattr_list); -} - /* * Deal with the result of a successful fetch ACL operation. */ @@ -230,6 +207,8 @@ static int afs_xattr_get_yfs(const struct xattr_handler *handler, else ret = -ERANGE; } + } else if (ret == -ENOTSUPP) { + ret = -ENODATA; } error_yacl: @@ -254,6 +233,7 @@ static int afs_xattr_set_yfs(const struct xattr_handler *handler, { struct afs_operation *op; struct afs_vnode *vnode = AFS_FS_I(inode); + int ret; if (flags == XATTR_CREATE || strcmp(name, "acl") != 0) @@ -268,7 +248,10 @@ static int afs_xattr_set_yfs(const struct xattr_handler *handler, return afs_put_operation(op); op->ops = &yfs_store_opaque_acl2_operation; - return afs_do_sync_operation(op); + ret = afs_do_sync_operation(op); + if (ret == -ENOTSUPP) + ret = -ENODATA; + return ret; } static const struct xattr_handler afs_xattr_yfs_handler = { diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 3880a82da1dc..11b5bf241955 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -647,12 +647,24 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, struct super_block *sb = file_inode(file)->i_sb; struct dentry *root = sb->s_root, *dentry; int err = 0; + struct file *f = NULL; e = create_entry(buffer, count); if (IS_ERR(e)) return PTR_ERR(e); + if (e->flags & MISC_FMT_OPEN_FILE) { + f = open_exec(e->interpreter); + if (IS_ERR(f)) { + pr_notice("register: failed to install interpreter file %s\n", + e->interpreter); + kfree(e); + return PTR_ERR(f); + } + e->interp_file = f; + } + inode_lock(d_inode(root)); dentry = lookup_one_len(e->name, root, strlen(e->name)); err = PTR_ERR(dentry); @@ -676,21 +688,6 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, goto out2; } - if (e->flags & MISC_FMT_OPEN_FILE) { - struct file *f; - - f = open_exec(e->interpreter); - if (IS_ERR(f)) { - err = PTR_ERR(f); - pr_notice("register: failed to install interpreter file %s\n", e->interpreter); - simple_release_fs(&bm_mnt, &entry_count); - iput(inode); - inode = NULL; - goto out2; - } - e->interp_file = f; - } - e->dentry = dget(dentry); inode->i_private = e; inode->i_fop = &bm_entry_operations; @@ -707,6 +704,8 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, inode_unlock(d_inode(root)); if (err) { + if (f) + filp_close(f, NULL); kfree(e); return err; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 235b5042672e..c33151020bcd 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -118,13 +118,22 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode, if (!(mode & FMODE_EXCL)) { int err = bd_prepare_to_claim(bdev, truncate_bdev_range); if (err) - return err; + goto invalidate; } truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend); if (!(mode & FMODE_EXCL)) bd_abort_claiming(bdev, truncate_bdev_range); return 0; + +invalidate: + /* + * Someone else has handle exclusively open. Try invalidating instead. + * The 'end' argument is inclusive so the rounding is safe. + */ + return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping, + lstart >> PAGE_SHIFT, + lend >> PAGE_SHIFT); } EXPORT_SYMBOL(truncate_bdev_range); diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 9cadacf3ec27..7ac59a568595 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -2541,13 +2541,6 @@ void btrfs_backref_cleanup_node(struct btrfs_backref_cache *cache, list_del(&edge->list[UPPER]); btrfs_backref_free_edge(cache, edge); - if (RB_EMPTY_NODE(&upper->rb_node)) { - BUG_ON(!list_empty(&node->upper)); - btrfs_backref_drop_node(cache, node); - node = upper; - node->lowest = 1; - continue; - } /* * Add the node to leaf node list if no other child block * cached. @@ -2624,7 +2617,7 @@ static int handle_direct_tree_backref(struct btrfs_backref_cache *cache, /* Only reloc backref cache cares about a specific root */ if (cache->is_reloc) { root = find_reloc_root(cache->fs_info, cur->bytenr); - if (WARN_ON(!root)) + if (!root) return -ENOENT; cur->root = root; } else { diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index ff705cc564a9..17abde7f794c 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -296,6 +296,9 @@ static inline void btrfs_backref_free_node(struct btrfs_backref_cache *cache, struct btrfs_backref_node *node) { if (node) { + ASSERT(list_empty(&node->list)); + ASSERT(list_empty(&node->lower)); + ASSERT(node->eb == NULL); cache->nr_nodes--; btrfs_put_root(node->root); kfree(node); @@ -340,11 +343,11 @@ static inline void btrfs_backref_drop_node_buffer( static inline void btrfs_backref_drop_node(struct btrfs_backref_cache *tree, struct btrfs_backref_node *node) { - BUG_ON(!list_empty(&node->upper)); + ASSERT(list_empty(&node->upper)); btrfs_backref_drop_node_buffer(node); - list_del(&node->list); - list_del(&node->lower); + list_del_init(&node->list); + list_del_init(&node->lower); if (!RB_EMPTY_NODE(&node->rb_node)) rb_erase(&node->rb_node, &tree->rb_root); btrfs_backref_free_node(tree, node); diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 48ebc106a606..3cf1b953f523 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1150,6 +1150,11 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) spin_lock(&sinfo->lock); spin_lock(&cache->lock); + if (cache->swap_extents) { + ret = -ETXTBSY; + goto out; + } + if (cache->ro) { cache->ro++; ret = 0; @@ -1371,9 +1376,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) btrfs_space_info_update_bytes_pinned(fs_info, space_info, -block_group->pinned); space_info->bytes_readonly += block_group->pinned; - percpu_counter_add_batch(&space_info->total_bytes_pinned, - -block_group->pinned, - BTRFS_TOTAL_BYTES_PINNED_BATCH); + __btrfs_mod_total_bytes_pinned(space_info, -block_group->pinned); block_group->pinned = 0; spin_unlock(&block_group->lock); @@ -2255,7 +2258,7 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, } ret = inc_block_group_ro(cache, 0); - if (!do_chunk_alloc) + if (!do_chunk_alloc || ret == -ETXTBSY) goto unlock_out; if (!ret) goto out; @@ -2264,6 +2267,8 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache, if (ret < 0) goto out; ret = inc_block_group_ro(cache, 0); + if (ret == -ETXTBSY) + goto unlock_out; out: if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags); @@ -2564,8 +2569,10 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) if (!path) { path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + ret = -ENOMEM; + goto out; + } } /* @@ -2659,16 +2666,14 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) btrfs_put_block_group(cache); if (drop_reserve) btrfs_delayed_refs_rsv_release(fs_info, 1); - - if (ret) - break; - /* * Avoid blocking other tasks for too long. It might even save * us from writing caches for block groups that are going to be * removed. */ mutex_unlock(&trans->transaction->cache_write_mutex); + if (ret) + goto out; mutex_lock(&trans->transaction->cache_write_mutex); } mutex_unlock(&trans->transaction->cache_write_mutex); @@ -2692,7 +2697,12 @@ int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans) goto again; } spin_unlock(&cur_trans->dirty_bgs_lock); - } else if (ret < 0) { + } +out: + if (ret < 0) { + spin_lock(&cur_trans->dirty_bgs_lock); + list_splice_init(&dirty, &cur_trans->dirty_bgs); + spin_unlock(&cur_trans->dirty_bgs_lock); btrfs_cleanup_dirty_bgs(cur_trans, fs_info); } @@ -2896,10 +2906,8 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans, spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); - percpu_counter_add_batch( - &cache->space_info->total_bytes_pinned, - num_bytes, - BTRFS_TOTAL_BYTES_PINNED_BATCH); + __btrfs_mod_total_bytes_pinned(cache->space_info, + num_bytes); set_extent_dirty(&trans->transaction->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); @@ -3344,6 +3352,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) ASSERT(list_empty(&block_group->io_list)); ASSERT(list_empty(&block_group->bg_list)); ASSERT(refcount_read(&block_group->refs) == 1); + ASSERT(block_group->swap_extents == 0); btrfs_put_block_group(block_group); spin_lock(&info->block_group_cache_lock); @@ -3410,3 +3419,26 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group) __btrfs_remove_free_space_cache(block_group->free_space_ctl); } } + +bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg) +{ + bool ret = true; + + spin_lock(&bg->lock); + if (bg->ro) + ret = false; + else + bg->swap_extents++; + spin_unlock(&bg->lock); + + return ret; +} + +void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount) +{ + spin_lock(&bg->lock); + ASSERT(!bg->ro); + ASSERT(bg->swap_extents >= amount); + bg->swap_extents -= amount; + spin_unlock(&bg->lock); +} diff --git a/fs/btrfs/block-group.h b/fs/btrfs/block-group.h index 8f74a96074f7..8a925741dc34 100644 --- a/fs/btrfs/block-group.h +++ b/fs/btrfs/block-group.h @@ -181,6 +181,12 @@ struct btrfs_block_group { */ int needs_free_space; + /* + * Number of extents in this block group used for swap files. + * All accesses protected by the spinlock 'lock'. + */ + int swap_extents; + /* Record locked full stripes for RAID5/6 block group */ struct btrfs_full_stripe_locks_tree full_stripe_locks_root; }; @@ -301,4 +307,7 @@ int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start, u64 physical, u64 **logical, int *naddrs, int *stripe_len); #endif +bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg); +void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount); + #endif /* BTRFS_BLOCK_GROUP_H */ diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index cc89b63d65a4..33fe5d839c11 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -221,9 +221,12 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, ret = btrfs_inc_ref(trans, root, cow, 1); else ret = btrfs_inc_ref(trans, root, cow, 0); - - if (ret) + if (ret) { + btrfs_tree_unlock(cow); + free_extent_buffer(cow); + btrfs_abort_transaction(trans, ret); return ret; + } btrfs_mark_buffer_dirty(cow); *cow_ret = cow; @@ -1362,7 +1365,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq) "failed to read tree block %llu from get_old_root", logical); } else { + btrfs_tree_read_lock(old); eb = btrfs_clone_extent_buffer(old); + btrfs_tree_read_unlock(old); free_extent_buffer(old); } } else if (old_root) { diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 4debdbdde2ab..0c8c55a41d7b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -523,6 +523,11 @@ struct btrfs_swapfile_pin { * points to a struct btrfs_device. */ bool is_block_group; + /* + * Only used when 'is_block_group' is true and it is the number of + * extents used by a swapfile for this block group ('ptr' field). + */ + int bg_extent_count; }; bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 70c0340d839c..a09912cf1852 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -627,7 +627,8 @@ static int btrfs_delayed_inode_reserve_metadata( */ if (!src_rsv || (!trans->bytes_reserved && src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) { - ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true); + ret = btrfs_qgroup_reserve_meta(root, num_bytes, + BTRFS_QGROUP_RSV_META_PREALLOC, true); if (ret < 0) return ret; ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes, @@ -649,7 +650,7 @@ static int btrfs_delayed_inode_reserve_metadata( btrfs_ino(inode), num_bytes, 1); } else { - btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize); + btrfs_qgroup_free_meta_prealloc(root, num_bytes); } return ret; } diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 353cc2994d10..30883b9a26d8 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -648,12 +648,12 @@ static int insert_delayed_ref(struct btrfs_trans_handle *trans, */ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *existing, - struct btrfs_delayed_ref_head *update, - int *old_ref_mod_ret) + struct btrfs_delayed_ref_head *update) { struct btrfs_delayed_ref_root *delayed_refs = &trans->transaction->delayed_refs; struct btrfs_fs_info *fs_info = trans->fs_info; + u64 flags = btrfs_ref_head_to_space_flags(existing); int old_ref_mod; BUG_ON(existing->is_data != update->is_data); @@ -701,8 +701,6 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans, * currently, for refs we just added we know we're a-ok. */ old_ref_mod = existing->total_ref_mod; - if (old_ref_mod_ret) - *old_ref_mod_ret = old_ref_mod; existing->ref_mod += update->ref_mod; existing->total_ref_mod += update->ref_mod; @@ -724,6 +722,27 @@ static noinline void update_existing_head_ref(struct btrfs_trans_handle *trans, trans->delayed_ref_updates += csum_leaves; } } + + /* + * This handles the following conditions: + * + * 1. We had a ref mod of 0 or more and went negative, indicating that + * we may be freeing space, so add our space to the + * total_bytes_pinned counter. + * 2. We were negative and went to 0 or positive, so no longer can say + * that the space would be pinned, decrement our counter from the + * total_bytes_pinned counter. + * 3. We are now at 0 and have ->must_insert_reserved set, which means + * this was a new allocation and then we dropped it, and thus must + * add our space to the total_bytes_pinned counter. + */ + if (existing->total_ref_mod < 0 && old_ref_mod >= 0) + btrfs_mod_total_bytes_pinned(fs_info, flags, existing->num_bytes); + else if (existing->total_ref_mod >= 0 && old_ref_mod < 0) + btrfs_mod_total_bytes_pinned(fs_info, flags, -existing->num_bytes); + else if (existing->total_ref_mod == 0 && existing->must_insert_reserved) + btrfs_mod_total_bytes_pinned(fs_info, flags, existing->num_bytes); + spin_unlock(&existing->lock); } @@ -798,8 +817,7 @@ static noinline struct btrfs_delayed_ref_head * add_delayed_ref_head(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_head *head_ref, struct btrfs_qgroup_extent_record *qrecord, - int action, int *qrecord_inserted_ret, - int *old_ref_mod, int *new_ref_mod) + int action, int *qrecord_inserted_ret) { struct btrfs_delayed_ref_head *existing; struct btrfs_delayed_ref_root *delayed_refs; @@ -821,8 +839,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans, existing = htree_insert(&delayed_refs->href_root, &head_ref->href_node); if (existing) { - update_existing_head_ref(trans, existing, head_ref, - old_ref_mod); + update_existing_head_ref(trans, existing, head_ref); /* * we've updated the existing ref, free the newly * allocated ref @@ -830,14 +847,17 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans, kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); head_ref = existing; } else { - if (old_ref_mod) - *old_ref_mod = 0; + u64 flags = btrfs_ref_head_to_space_flags(head_ref); + if (head_ref->is_data && head_ref->ref_mod < 0) { delayed_refs->pending_csums += head_ref->num_bytes; trans->delayed_ref_updates += btrfs_csum_bytes_to_leaves(trans->fs_info, head_ref->num_bytes); } + if (head_ref->ref_mod < 0) + btrfs_mod_total_bytes_pinned(trans->fs_info, flags, + head_ref->num_bytes); delayed_refs->num_heads++; delayed_refs->num_heads_ready++; atomic_inc(&delayed_refs->num_entries); @@ -845,8 +865,6 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans, } if (qrecord_inserted_ret) *qrecord_inserted_ret = qrecord_inserted; - if (new_ref_mod) - *new_ref_mod = head_ref->total_ref_mod; return head_ref; } @@ -909,8 +927,7 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info, */ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref, - struct btrfs_delayed_extent_op *extent_op, - int *old_ref_mod, int *new_ref_mod) + struct btrfs_delayed_extent_op *extent_op) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_tree_ref *ref; @@ -977,8 +994,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, * the spin lock */ head_ref = add_delayed_ref_head(trans, head_ref, record, - action, &qrecord_inserted, - old_ref_mod, new_ref_mod); + action, &qrecord_inserted); ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node); spin_unlock(&delayed_refs->lock); @@ -1006,8 +1022,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, */ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref, - u64 reserved, int *old_ref_mod, - int *new_ref_mod) + u64 reserved) { struct btrfs_fs_info *fs_info = trans->fs_info; struct btrfs_delayed_data_ref *ref; @@ -1073,8 +1088,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, * the spin lock */ head_ref = add_delayed_ref_head(trans, head_ref, record, - action, &qrecord_inserted, - old_ref_mod, new_ref_mod); + action, &qrecord_inserted); ret = insert_delayed_ref(trans, delayed_refs, head_ref, &ref->node); spin_unlock(&delayed_refs->lock); @@ -1117,7 +1131,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, spin_lock(&delayed_refs->lock); add_delayed_ref_head(trans, head_ref, NULL, BTRFS_UPDATE_DELAYED_HEAD, - NULL, NULL, NULL); + NULL); spin_unlock(&delayed_refs->lock); diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 1c977e6d45dc..3ba140468f12 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -326,6 +326,16 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) } } +static inline u64 btrfs_ref_head_to_space_flags( + struct btrfs_delayed_ref_head *head_ref) +{ + if (head_ref->is_data) + return BTRFS_BLOCK_GROUP_DATA; + else if (head_ref->is_system) + return BTRFS_BLOCK_GROUP_SYSTEM; + return BTRFS_BLOCK_GROUP_METADATA; +} + static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *head) { if (refcount_dec_and_test(&head->refs)) @@ -334,12 +344,10 @@ static inline void btrfs_put_delayed_ref_head(struct btrfs_delayed_ref_head *hea int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref, - struct btrfs_delayed_extent_op *extent_op, - int *old_ref_mod, int *new_ref_mod); + struct btrfs_delayed_extent_op *extent_op); int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref, - u64 reserved, int *old_ref_mod, - int *new_ref_mod); + u64 reserved); int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, u64 bytenr, u64 num_bytes, struct btrfs_delayed_extent_op *extent_op); diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 324f646d6e5e..02a68b04e43f 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -80,6 +80,9 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) struct btrfs_dev_replace_item *ptr; u64 src_devid; + if (!dev_root) + return 0; + path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 07a2b4f69b10..3cb73a0d12d0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2300,8 +2300,9 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info) } else { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->dev_root = root; - btrfs_init_devices_late(fs_info); } + /* Initialize fs_info for all devices in any case */ + btrfs_init_devices_late(fs_info); /* If IGNOREDATACSUMS is set don't bother reading the csum root. */ if (!btrfs_test_opt(fs_info, IGNOREDATACSUMS)) { @@ -2913,6 +2914,21 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) } } + /* + * btrfs_find_orphan_roots() is responsible for finding all the dead + * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load + * them into the fs_info->fs_roots_radix tree. This must be done before + * calling btrfs_orphan_cleanup() on the tree root. If we don't do it + * first, then btrfs_orphan_cleanup() will delete a dead root's orphan + * item before the root's tree is deleted - this means that if we unmount + * or crash before the deletion completes, on the next mount we will not + * delete what remains of the tree because the orphan item does not + * exists anymore, which is what tells us we have a pending deletion. + */ + ret = btrfs_find_orphan_roots(fs_info); + if (ret) + goto out; + ret = btrfs_cleanup_fs_roots(fs_info); if (ret) goto out; @@ -2972,7 +2988,6 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) } } - ret = btrfs_find_orphan_roots(fs_info); out: return ret; } diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0c335dae5af7..6f0c59debc2b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -82,41 +82,6 @@ void btrfs_free_excluded_extents(struct btrfs_block_group *cache) EXTENT_UPTODATE); } -static u64 generic_ref_to_space_flags(struct btrfs_ref *ref) -{ - if (ref->type == BTRFS_REF_METADATA) { - if (ref->tree_ref.root == BTRFS_CHUNK_TREE_OBJECTID) - return BTRFS_BLOCK_GROUP_SYSTEM; - else - return BTRFS_BLOCK_GROUP_METADATA; - } - return BTRFS_BLOCK_GROUP_DATA; -} - -static void add_pinned_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_ref *ref) -{ - struct btrfs_space_info *space_info; - u64 flags = generic_ref_to_space_flags(ref); - - space_info = btrfs_find_space_info(fs_info, flags); - ASSERT(space_info); - percpu_counter_add_batch(&space_info->total_bytes_pinned, ref->len, - BTRFS_TOTAL_BYTES_PINNED_BATCH); -} - -static void sub_pinned_bytes(struct btrfs_fs_info *fs_info, - struct btrfs_ref *ref) -{ - struct btrfs_space_info *space_info; - u64 flags = generic_ref_to_space_flags(ref); - - space_info = btrfs_find_space_info(fs_info, flags); - ASSERT(space_info); - percpu_counter_add_batch(&space_info->total_bytes_pinned, -ref->len, - BTRFS_TOTAL_BYTES_PINNED_BATCH); -} - /* simple helper to search for an existing data extent at a given offset */ int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len) { @@ -1388,7 +1353,6 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref) { struct btrfs_fs_info *fs_info = trans->fs_info; - int old_ref_mod, new_ref_mod; int ret; ASSERT(generic_ref->type != BTRFS_REF_NOT_SET && @@ -1397,17 +1361,12 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, generic_ref->tree_ref.root == BTRFS_TREE_LOG_OBJECTID); if (generic_ref->type == BTRFS_REF_METADATA) - ret = btrfs_add_delayed_tree_ref(trans, generic_ref, - NULL, &old_ref_mod, &new_ref_mod); + ret = btrfs_add_delayed_tree_ref(trans, generic_ref, NULL); else - ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0, - &old_ref_mod, &new_ref_mod); + ret = btrfs_add_delayed_data_ref(trans, generic_ref, 0); btrfs_ref_tree_mod(fs_info, generic_ref); - if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0) - sub_pinned_bytes(fs_info, generic_ref); - return ret; } @@ -1795,34 +1754,28 @@ void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, { int nr_items = 1; /* Dropping this ref head update. */ - if (head->total_ref_mod < 0) { - struct btrfs_space_info *space_info; - u64 flags; + /* + * We had csum deletions accounted for in our delayed refs rsv, we need + * to drop the csum leaves for this update from our delayed_refs_rsv. + */ + if (head->total_ref_mod < 0 && head->is_data) { + spin_lock(&delayed_refs->lock); + delayed_refs->pending_csums -= head->num_bytes; + spin_unlock(&delayed_refs->lock); + nr_items += btrfs_csum_bytes_to_leaves(fs_info, head->num_bytes); + } - if (head->is_data) - flags = BTRFS_BLOCK_GROUP_DATA; - else if (head->is_system) - flags = BTRFS_BLOCK_GROUP_SYSTEM; - else - flags = BTRFS_BLOCK_GROUP_METADATA; - space_info = btrfs_find_space_info(fs_info, flags); - ASSERT(space_info); - percpu_counter_add_batch(&space_info->total_bytes_pinned, - -head->num_bytes, - BTRFS_TOTAL_BYTES_PINNED_BATCH); + /* + * We were dropping refs, or had a new ref and dropped it, and thus must + * adjust down our total_bytes_pinned, the space may or may not have + * been pinned and so is accounted for properly in the pinned space by + * now. + */ + if (head->total_ref_mod < 0 || + (head->total_ref_mod == 0 && head->must_insert_reserved)) { + u64 flags = btrfs_ref_head_to_space_flags(head); - /* - * We had csum deletions accounted for in our delayed refs rsv, - * we need to drop the csum leaves for this update from our - * delayed_refs_rsv. - */ - if (head->is_data) { - spin_lock(&delayed_refs->lock); - delayed_refs->pending_csums -= head->num_bytes; - spin_unlock(&delayed_refs->lock); - nr_items += btrfs_csum_bytes_to_leaves(fs_info, - head->num_bytes); - } + btrfs_mod_total_bytes_pinned(fs_info, flags, -head->num_bytes); } btrfs_delayed_refs_rsv_release(fs_info, nr_items); @@ -2572,8 +2525,7 @@ static int pin_down_extent(struct btrfs_trans_handle *trans, spin_unlock(&cache->lock); spin_unlock(&cache->space_info->lock); - percpu_counter_add_batch(&cache->space_info->total_bytes_pinned, - num_bytes, BTRFS_TOTAL_BYTES_PINNED_BATCH); + __btrfs_mod_total_bytes_pinned(cache->space_info, num_bytes); set_extent_dirty(&trans->transaction->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); return 0; @@ -2784,8 +2736,7 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, cache->pinned -= len; btrfs_space_info_update_bytes_pinned(fs_info, space_info, -len); space_info->max_extent_size = 0; - percpu_counter_add_batch(&space_info->total_bytes_pinned, - -len, BTRFS_TOTAL_BYTES_PINNED_BATCH); + __btrfs_mod_total_bytes_pinned(space_info, -len); if (cache->ro) { space_info->bytes_readonly += len; readonly = true; @@ -3318,7 +3269,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_ref generic_ref = { 0 }; - int pin = 1; int ret; btrfs_init_generic_ref(&generic_ref, BTRFS_DROP_DELAYED_REF, @@ -3327,13 +3277,9 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, root->root_key.objectid); if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { - int old_ref_mod, new_ref_mod; - btrfs_ref_tree_mod(fs_info, &generic_ref); - ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL, - &old_ref_mod, &new_ref_mod); + ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, NULL); BUG_ON(ret); /* -ENOMEM */ - pin = old_ref_mod >= 0 && new_ref_mod < 0; } if (last_ref && btrfs_header_generation(buf) == trans->transid) { @@ -3345,7 +3291,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, goto out; } - pin = 0; cache = btrfs_lookup_block_group(fs_info, buf->start); if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { @@ -3362,9 +3307,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, trace_btrfs_reserved_extent_free(fs_info, buf->start, buf->len); } out: - if (pin) - add_pinned_bytes(fs_info, &generic_ref); - if (last_ref) { /* * Deleting the buffer, clear the corrupt flag since it doesn't @@ -3378,7 +3320,6 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans, int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref) { struct btrfs_fs_info *fs_info = trans->fs_info; - int old_ref_mod, new_ref_mod; int ret; if (btrfs_is_testing(fs_info)) @@ -3394,14 +3335,11 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref) ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID)) { /* unlocks the pinned mutex */ btrfs_pin_extent(trans, ref->bytenr, ref->len, 1); - old_ref_mod = new_ref_mod = 0; ret = 0; } else if (ref->type == BTRFS_REF_METADATA) { - ret = btrfs_add_delayed_tree_ref(trans, ref, NULL, - &old_ref_mod, &new_ref_mod); + ret = btrfs_add_delayed_tree_ref(trans, ref, NULL); } else { - ret = btrfs_add_delayed_data_ref(trans, ref, 0, - &old_ref_mod, &new_ref_mod); + ret = btrfs_add_delayed_data_ref(trans, ref, 0); } if (!((ref->type == BTRFS_REF_METADATA && @@ -3410,9 +3348,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref) ref->data_ref.ref_root == BTRFS_TREE_LOG_OBJECTID))) btrfs_ref_tree_mod(fs_info, ref); - if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0) - add_pinned_bytes(fs_info, ref); - return ret; } @@ -4528,7 +4463,6 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, struct btrfs_key *ins) { struct btrfs_ref generic_ref = { 0 }; - int ret; BUG_ON(root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID); @@ -4536,9 +4470,8 @@ int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, ins->objectid, ins->offset, 0); btrfs_init_data_ref(&generic_ref, root->root_key.objectid, owner, offset); btrfs_ref_tree_mod(root->fs_info, &generic_ref); - ret = btrfs_add_delayed_data_ref(trans, &generic_ref, - ram_bytes, NULL, NULL); - return ret; + + return btrfs_add_delayed_data_ref(trans, &generic_ref, ram_bytes); } /* @@ -4730,8 +4663,7 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, generic_ref.real_root = root->root_key.objectid; btrfs_init_tree_ref(&generic_ref, level, root_objectid); btrfs_ref_tree_mod(fs_info, &generic_ref); - ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, - extent_op, NULL, NULL); + ret = btrfs_add_delayed_tree_ref(trans, &generic_ref, extent_op); if (ret) goto out_free_delayed; } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0e41459b8de6..f851a1a63833 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -3264,8 +3264,11 @@ static int btrfs_zero_range(struct inode *inode, goto out; ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved, alloc_start, bytes_to_reserve); - if (ret) + if (ret) { + unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, + lockend, &cached_state); goto out; + } ret = btrfs_prealloc_file_range(inode, mode, alloc_start, alloc_end - alloc_start, i_blocksize(inode), diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 4d8897879c9c..b64b88987367 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -775,8 +775,10 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, while (num_entries) { e = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); - if (!e) + if (!e) { + ret = -ENOMEM; goto free_cache; + } ret = io_ctl_read_entry(&io_ctl, e, &type); if (ret) { @@ -785,6 +787,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, } if (!e->bytes) { + ret = -1; kmem_cache_free(btrfs_free_space_cachep, e); goto free_cache; } @@ -805,6 +808,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, e->bitmap = kmem_cache_zalloc( btrfs_free_space_bitmap_cachep, GFP_NOFS); if (!e->bitmap) { + ret = -ENOMEM; kmem_cache_free( btrfs_free_space_cachep, e); goto free_cache; @@ -2704,8 +2708,10 @@ static void __btrfs_return_cluster_to_free_space( struct rb_node *node; spin_lock(&cluster->lock); - if (cluster->block_group != block_group) - goto out; + if (cluster->block_group != block_group) { + spin_unlock(&cluster->lock); + return; + } cluster->block_group = NULL; cluster->window_start = 0; @@ -2743,8 +2749,6 @@ static void __btrfs_return_cluster_to_free_space( entry->offset, &entry->offset_index, bitmap); } cluster->root = RB_ROOT; - -out: spin_unlock(&cluster->lock); btrfs_put_block_group(block_group); } @@ -3024,8 +3028,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group, entry->bytes -= bytes; } - if (entry->bytes == 0) - rb_erase(&entry->offset_index, &cluster->root); break; } out: @@ -3042,7 +3044,10 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group, ctl->free_space -= bytes; if (!entry->bitmap && !btrfs_free_space_trimmed(entry)) ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; + + spin_lock(&cluster->lock); if (entry->bytes == 0) { + rb_erase(&entry->offset_index, &cluster->root); ctl->free_extents--; if (entry->bitmap) { kmem_cache_free(btrfs_free_space_bitmap_cachep, @@ -3055,6 +3060,7 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group, kmem_cache_free(btrfs_free_space_cachep, entry); } + spin_unlock(&cluster->lock); spin_unlock(&ctl->tree_lock); return ret; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a8e0a6b038d3..fe723eadced7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2947,11 +2947,13 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start, * @bio_offset: offset to the beginning of the bio (in bytes) * @page: page where is the data to be verified * @pgoff: offset inside the page + * @start: logical offset in the file * * The length of such check is always one sector size. */ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio, - u32 bio_offset, struct page *page, u32 pgoff) + u32 bio_offset, struct page *page, u32 pgoff, + u64 start) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); @@ -2978,8 +2980,8 @@ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio, kunmap_atomic(kaddr); return 0; zeroit: - btrfs_print_data_csum_error(BTRFS_I(inode), page_offset(page) + pgoff, - csum, csum_expected, io_bio->mirror_num); + btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected, + io_bio->mirror_num); if (io_bio->device) btrfs_dev_stat_inc_and_print(io_bio->device, BTRFS_DEV_STAT_CORRUPTION_ERRS); @@ -3032,7 +3034,8 @@ int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset, pg_off += sectorsize, bio_offset += sectorsize) { int ret; - ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off); + ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off, + page_offset(page) + pg_off); if (ret < 0) return -EIO; } @@ -5916,7 +5919,7 @@ static int btrfs_dirty_inode(struct inode *inode) return PTR_ERR(trans); ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); - if (ret && ret == -ENOSPC) { + if (ret && (ret == -ENOSPC || ret == -EDQUOT)) { /* whoops, lets try again with the full transaction */ btrfs_end_transaction(trans); trans = btrfs_start_transaction(root, 1); @@ -7742,7 +7745,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, ASSERT(pgoff < PAGE_SIZE); if (uptodate && (!csum || !check_data_csum(inode, io_bio, - bio_offset, bvec.bv_page, pgoff))) { + bio_offset, bvec.bv_page, + pgoff, start))) { clean_io_failure(fs_info, failure_tree, io_tree, start, bvec.bv_page, btrfs_ino(BTRFS_I(inode)), @@ -8186,8 +8190,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset, if (!inode_evicting) lock_extent_bits(tree, page_start, page_end, &cached_state); -again: + start = page_start; +again: ordered = btrfs_lookup_ordered_range(inode, start, page_end - start + 1); if (ordered) { found_ordered = true; @@ -8805,7 +8810,7 @@ int __init btrfs_init_cachep(void) btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap", PAGE_SIZE, PAGE_SIZE, - SLAB_RED_ZONE, NULL); + SLAB_MEM_SPREAD, NULL); if (!btrfs_free_space_bitmap_cachep) goto fail; @@ -9992,6 +9997,7 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr, sp->ptr = ptr; sp->inode = inode; sp->is_block_group = is_block_group; + sp->bg_extent_count = 1; spin_lock(&fs_info->swapfile_pins_lock); p = &fs_info->swapfile_pins.rb_node; @@ -10005,6 +10011,8 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr, (sp->ptr == entry->ptr && sp->inode > entry->inode)) { p = &(*p)->rb_right; } else { + if (is_block_group) + entry->bg_extent_count++; spin_unlock(&fs_info->swapfile_pins_lock); kfree(sp); return 1; @@ -10030,8 +10038,11 @@ static void btrfs_free_swapfile_pins(struct inode *inode) sp = rb_entry(node, struct btrfs_swapfile_pin, node); if (sp->inode == inode) { rb_erase(&sp->node, &fs_info->swapfile_pins); - if (sp->is_block_group) + if (sp->is_block_group) { + btrfs_dec_block_group_swap_extents(sp->ptr, + sp->bg_extent_count); btrfs_put_block_group(sp->ptr); + } kfree(sp); } node = next; @@ -10092,7 +10103,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { struct inode *inode = file_inode(file); - struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + struct btrfs_root *root = BTRFS_I(inode)->root; + struct btrfs_fs_info *fs_info = root->fs_info; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_state *cached_state = NULL; struct extent_map *em = NULL; @@ -10143,13 +10155,27 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, "cannot activate swapfile while exclusive operation is running"); return -EBUSY; } + + /* + * Prevent snapshot creation while we are activating the swap file. + * We do not want to race with snapshot creation. If snapshot creation + * already started before we bumped nr_swapfiles from 0 to 1 and + * completes before the first write into the swap file after it is + * activated, than that write would fallback to COW. + */ + if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) { + btrfs_exclop_finish(fs_info); + btrfs_warn(fs_info, + "cannot activate swapfile because snapshot creation is in progress"); + return -EINVAL; + } /* * Snapshots can create extents which require COW even if NODATACOW is * set. We use this counter to prevent snapshots. We must increment it * before walking the extents because we don't want a concurrent * snapshot to run after we've already checked the extents. */ - atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles); + atomic_inc(&root->nr_swapfiles); isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize); @@ -10246,6 +10272,17 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, goto out; } + if (!btrfs_inc_block_group_swap_extents(bg)) { + btrfs_warn(fs_info, + "block group for swapfile at %llu is read-only%s", + bg->start, + atomic_read(&fs_info->scrubs_running) ? + " (scrub running)" : ""); + btrfs_put_block_group(bg); + ret = -EINVAL; + goto out; + } + ret = btrfs_add_swapfile_pin(inode, bg, true); if (ret) { btrfs_put_block_group(bg); @@ -10284,6 +10321,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file, if (ret) btrfs_swap_deactivate(file); + btrfs_drew_write_unlock(&root->snapshot_lock); + btrfs_exclop_finish(fs_info); if (ret) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index dde49a791f3e..0a4ab121c684 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1926,7 +1926,10 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, if (vol_args->flags & BTRFS_SUBVOL_RDONLY) readonly = true; if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { - if (vol_args->size > PAGE_SIZE) { + u64 nums; + + if (vol_args->size < sizeof(*inherit) || + vol_args->size > PAGE_SIZE) { ret = -EINVAL; goto free_args; } @@ -1935,6 +1938,20 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, ret = PTR_ERR(inherit); goto free_args; } + + if (inherit->num_qgroups > PAGE_SIZE || + inherit->num_ref_copies > PAGE_SIZE || + inherit->num_excl_copies > PAGE_SIZE) { + ret = -EINVAL; + goto free_inherit; + } + + nums = inherit->num_qgroups + 2 * inherit->num_ref_copies + + 2 * inherit->num_excl_copies; + if (vol_args->size != struct_size(inherit, qgroups, nums)) { + ret = -EINVAL; + goto free_inherit; + } } ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 808370ada888..f0b9ef13153a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -226,7 +226,6 @@ static void __del_qgroup_rb(struct btrfs_fs_info *fs_info, { struct btrfs_qgroup_list *list; - btrfs_sysfs_del_one_qgroup(fs_info, qgroup); list_del(&qgroup->dirty); while (!list_empty(&qgroup->groups)) { list = list_first_entry(&qgroup->groups, @@ -243,7 +242,6 @@ static void __del_qgroup_rb(struct btrfs_fs_info *fs_info, list_del(&list->next_member); kfree(list); } - kfree(qgroup); } /* must be called with qgroup_lock held */ @@ -569,6 +567,8 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) qgroup = rb_entry(n, struct btrfs_qgroup, node); rb_erase(n, &fs_info->qgroup_tree); __del_qgroup_rb(fs_info, qgroup); + btrfs_sysfs_del_one_qgroup(fs_info, qgroup); + kfree(qgroup); } /* * We call btrfs_free_qgroup_config() when unmounting @@ -1578,6 +1578,14 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) spin_lock(&fs_info->qgroup_lock); del_qgroup_rb(fs_info, qgroupid); spin_unlock(&fs_info->qgroup_lock); + + /* + * Remove the qgroup from sysfs now without holding the qgroup_lock + * spinlock, since the sysfs_remove_group() function needs to take + * the mutex kernfs_mutex through kernfs_remove_by_name_ns(). + */ + btrfs_sysfs_del_one_qgroup(fs_info, qgroup); + kfree(qgroup); out: mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; @@ -3841,8 +3849,8 @@ static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes, return num_bytes; } -static int qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, - enum btrfs_qgroup_rsv_type type, bool enforce) +int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, + enum btrfs_qgroup_rsv_type type, bool enforce) { struct btrfs_fs_info *fs_info = root->fs_info; int ret; @@ -3873,14 +3881,14 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, { int ret; - ret = qgroup_reserve_meta(root, num_bytes, type, enforce); + ret = btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce); if (ret <= 0 && ret != -EDQUOT) return ret; ret = try_flush_qgroup(root); if (ret < 0) return ret; - return qgroup_reserve_meta(root, num_bytes, type, enforce); + return btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce); } void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root) diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h index 50dea9a2d8fb..7283e4f549af 100644 --- a/fs/btrfs/qgroup.h +++ b/fs/btrfs/qgroup.h @@ -361,6 +361,8 @@ int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len); int btrfs_qgroup_free_data(struct btrfs_inode *inode, struct extent_changeset *reserved, u64 start, u64 len); +int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, + enum btrfs_qgroup_rsv_type type, bool enforce); int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes, enum btrfs_qgroup_rsv_type type, bool enforce); /* Reserve metadata space for pertrans and prealloc type */ diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 93fbf87bdc8d..123b79672c63 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -2363,16 +2363,21 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, SetPageUptodate(p_page); if (has_qstripe) { + /* RAID6, allocate and map temp space for the Q stripe */ q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); if (!q_page) { __free_page(p_page); goto cleanup; } SetPageUptodate(q_page); + pointers[rbio->real_stripes - 1] = kmap(q_page); } atomic_set(&rbio->error, 0); + /* Map the parity stripe just once */ + pointers[nr_data] = kmap(p_page); + for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { struct page *p; void *parity; @@ -2382,16 +2387,8 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, pointers[stripe] = kmap(p); } - /* then add the parity stripe */ - pointers[stripe++] = kmap(p_page); - if (has_qstripe) { - /* - * raid6, add the qstripe and call the - * library function to fill in our p/q - */ - pointers[stripe++] = kmap(q_page); - + /* RAID6, call the library function to fill in our P/Q */ raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, pointers); } else { @@ -2412,12 +2409,14 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, for (stripe = 0; stripe < nr_data; stripe++) kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); - kunmap(p_page); } + kunmap(p_page); __free_page(p_page); - if (q_page) + if (q_page) { + kunmap(q_page); __free_page(q_page); + } writeback: /* diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index b03e7891394e..a3bc721bab7c 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -550,6 +550,24 @@ static int btrfs_clone(struct inode *src, struct inode *inode, */ btrfs_release_path(path); + /* + * When using NO_HOLES and we are cloning a range that covers + * only a hole (no extents) into a range beyond the current + * i_size, punching a hole in the target range will not create + * an extent map defining a hole, because the range starts at or + * beyond current i_size. If the file previously had an i_size + * greater than the new i_size set by this clone operation, we + * need to make sure the next fsync is a full fsync, so that it + * detects and logs a hole covering a range from the current + * i_size to the new i_size. If the clone range covers extents, + * besides a hole, then we know the full sync flag was already + * set by previous calls to btrfs_replace_file_extents() that + * replaced file extent items. + */ + if (last_dest_end >= i_size_read(inode)) + set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, + &BTRFS_I(inode)->runtime_flags); + ret = btrfs_replace_file_extents(inode, path, last_dest_end, destoff + len - 1, NULL, &trans); if (ret) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index df63ef64c5c0..c01e0d7bef2c 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -668,9 +668,7 @@ static void __del_reloc_root(struct btrfs_root *root) RB_CLEAR_NODE(&node->rb_node); } spin_unlock(&rc->reloc_root_tree.lock); - if (!node) - return; - BUG_ON((struct btrfs_root *)node->data != root); + ASSERT(!node || (struct btrfs_root *)node->data == root); } /* diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 5f4f88a4d2c8..c09a494be8c6 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3630,6 +3630,13 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, * commit_transactions. */ ro_set = 0; + } else if (ret == -ETXTBSY) { + btrfs_warn(fs_info, + "skipping scrub of block group %llu due to active swapfile", + cache->start); + scrub_pause_off(fs_info); + ret = 0; + goto skip_unfreeze; } else { btrfs_warn(fs_info, "failed setting block group ro: %d", ret); @@ -3719,7 +3726,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, } else { spin_unlock(&cache->lock); } - +skip_unfreeze: btrfs_unfreeze_block_group(cache); btrfs_put_block_group(cache); if (ret) diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index 5646393b928c..74706f604bce 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -152,4 +152,21 @@ static inline void btrfs_space_info_free_bytes_may_use( int btrfs_reserve_data_bytes(struct btrfs_fs_info *fs_info, u64 bytes, enum btrfs_reserve_flush_enum flush); +static inline void __btrfs_mod_total_bytes_pinned( + struct btrfs_space_info *space_info, + s64 mod) +{ + percpu_counter_add_batch(&space_info->total_bytes_pinned, mod, + BTRFS_TOTAL_BYTES_PINNED_BATCH); +} + +static inline void btrfs_mod_total_bytes_pinned(struct btrfs_fs_info *fs_info, + u64 flags, s64 mod) +{ + struct btrfs_space_info *space_info = btrfs_find_space_info(fs_info, flags); + + ASSERT(space_info); + __btrfs_mod_total_bytes_pinned(space_info, mod); +} + #endif /* BTRFS_SPACE_INFO_H */ diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 12d7d3be7cd4..8baa806f43d7 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -1919,8 +1919,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) btrfs_resize_thread_pool(fs_info, fs_info->thread_pool_size, old_thread_pool_size); - if (btrfs_test_opt(fs_info, FREE_SPACE_TREE) != - btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && + if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) != + (bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && (!sb_rdonly(sb) || (*flags & SB_RDONLY))) { btrfs_warn(fs_info, "remount supports changing free space tree only from ro to rw"); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 6af7f2bf92de..fbf93067642a 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1319,7 +1319,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) struct btrfs_root *gang[8]; int i; int ret; - int err = 0; spin_lock(&fs_info->fs_roots_radix_lock); while (1) { @@ -1331,6 +1330,8 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) break; for (i = 0; i < ret; i++) { struct btrfs_root *root = gang[i]; + int ret2; + radix_tree_tag_clear(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); @@ -1350,17 +1351,17 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) root->node); } - err = btrfs_update_root(trans, fs_info->tree_root, + ret2 = btrfs_update_root(trans, fs_info->tree_root, &root->root_key, &root->root_item); + if (ret2) + return ret2; spin_lock(&fs_info->fs_roots_radix_lock); - if (err) - break; btrfs_qgroup_free_meta_all_pertrans(root); } } spin_unlock(&fs_info->fs_roots_radix_lock); - return err; + return 0; } /* diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 582061c7b547..f4ade821307d 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1453,22 +1453,14 @@ static int check_extent_data_ref(struct extent_buffer *leaf, return -EUCLEAN; } for (; ptr < end; ptr += sizeof(*dref)) { - u64 root_objectid; - u64 owner; u64 offset; - u64 hash; + /* + * We cannot check the extent_data_ref hash due to possible + * overflow from the leaf due to hash collisions. + */ dref = (struct btrfs_extent_data_ref *)ptr; - root_objectid = btrfs_extent_data_ref_root(leaf, dref); - owner = btrfs_extent_data_ref_objectid(leaf, dref); offset = btrfs_extent_data_ref_offset(leaf, dref); - hash = hash_extent_data_ref(root_objectid, owner, offset); - if (unlikely(hash != key->offset)) { - extent_err(leaf, slot, - "invalid extent data ref hash, item has 0x%016llx key has 0x%016llx", - hash, key->offset); - return -EUCLEAN; - } if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) { extent_err(leaf, slot, "invalid extent data backref offset, have %llu expect aligned to %u", diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d6c24c8ad749..93f2b030fb9d 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7282,6 +7282,9 @@ static int btrfs_device_init_dev_stats(struct btrfs_device *device, int item_size; int i, ret, slot; + if (!device->fs_info->dev_root) + return 0; + key.objectid = BTRFS_DEV_STATS_OBJECTID; key.type = BTRFS_PERSISTENT_ITEM_KEY; key.offset = device->devid; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index af6246f36a9e..03135dbb318a 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -229,11 +229,33 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name, { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_trans_handle *trans; + const bool start_trans = (current->journal_info == NULL); int ret; - trans = btrfs_start_transaction(root, 2); - if (IS_ERR(trans)) - return PTR_ERR(trans); + if (start_trans) { + /* + * 1 unit for inserting/updating/deleting the xattr + * 1 unit for the inode item update + */ + trans = btrfs_start_transaction(root, 2); + if (IS_ERR(trans)) + return PTR_ERR(trans); + } else { + /* + * This can happen when smack is enabled and a directory is being + * created. It happens through d_instantiate_new(), which calls + * smack_d_instantiate(), which in turn calls __vfs_setxattr() to + * set the transmute xattr (XATTR_NAME_SMACKTRANSMUTE) on the + * inode. We have already reserved space for the xattr and inode + * update at btrfs_mkdir(), so just use the transaction handle. + * We don't join or start a transaction, as that will reset the + * block_rsv of the handle and trigger a warning for the start + * case. + */ + ASSERT(strncmp(name, XATTR_SECURITY_PREFIX, + XATTR_SECURITY_PREFIX_LEN) == 0); + trans = current->journal_info; + } ret = btrfs_setxattr(trans, inode, name, value, size, flags); if (ret) @@ -244,7 +266,8 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name, ret = btrfs_update_inode(trans, root, BTRFS_I(inode)); BUG_ON(ret); out: - btrfs_end_transaction(trans); + if (start_trans) + btrfs_end_transaction(trans); return ret; } diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index c38846659019..2f80de440359 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -152,7 +152,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) sector_t sector = 0; struct blk_zone *zones = NULL; unsigned int i, nreported = 0, nr_zones; - unsigned int zone_sectors; + sector_t zone_sectors; int ret; if (!bdev_is_zoned(bdev)) @@ -485,7 +485,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw, u64 *bytenr_ret) { struct blk_zone zones[BTRFS_NR_SB_LOG_ZONES]; - unsigned int zone_sectors; + sector_t zone_sectors; u32 sb_zone; int ret; u64 zone_size; diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c index 9a4871636c6c..c8cf690013f3 100644 --- a/fs/btrfs/zstd.c +++ b/fs/btrfs/zstd.c @@ -28,10 +28,10 @@ /* 307s to avoid pathologically clashing with transaction commit */ #define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ) -static ZSTD_parameters zstd_get_btrfs_parameters(unsigned int level, +static zstd_parameters zstd_get_btrfs_parameters(unsigned int level, size_t src_len) { - ZSTD_parameters params = ZSTD_getParams(level, src_len, 0); + zstd_parameters params = zstd_get_params(level, src_len); if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG) params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG; @@ -48,8 +48,8 @@ struct workspace { unsigned long last_used; /* jiffies */ struct list_head list; struct list_head lru_list; - ZSTD_inBuffer in_buf; - ZSTD_outBuffer out_buf; + zstd_in_buffer in_buf; + zstd_out_buffer out_buf; }; /* @@ -155,12 +155,12 @@ static void zstd_calc_ws_mem_sizes(void) unsigned int level; for (level = 1; level <= ZSTD_BTRFS_MAX_LEVEL; level++) { - ZSTD_parameters params = + zstd_parameters params = zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT); size_t level_size = max_t(size_t, - ZSTD_CStreamWorkspaceBound(params.cParams), - ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT)); + zstd_cstream_workspace_bound(¶ms.cParams), + zstd_dstream_workspace_bound(ZSTD_BTRFS_MAX_INPUT)); max_size = max_t(size_t, max_size, level_size); zstd_ws_mem_sizes[level - 1] = max_size; @@ -371,7 +371,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, unsigned long *total_in, unsigned long *total_out) { struct workspace *workspace = list_entry(ws, struct workspace, list); - ZSTD_CStream *stream; + zstd_cstream *stream; int ret = 0; int nr_pages = 0; struct page *in_page = NULL; /* The current page to read */ @@ -381,7 +381,7 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, unsigned long len = *total_out; const unsigned long nr_dest_pages = *out_pages; unsigned long max_out = nr_dest_pages * PAGE_SIZE; - ZSTD_parameters params = zstd_get_btrfs_parameters(workspace->req_level, + zstd_parameters params = zstd_get_btrfs_parameters(workspace->req_level, len); *out_pages = 0; @@ -389,10 +389,10 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, *total_in = 0; /* Initialize the stream */ - stream = ZSTD_initCStream(params, len, workspace->mem, + stream = zstd_init_cstream(¶ms, len, workspace->mem, workspace->size); if (!stream) { - pr_warn("BTRFS: ZSTD_initCStream failed\n"); + pr_warn("BTRFS: zstd_init_cstream failed\n"); ret = -EIO; goto out; } @@ -418,11 +418,11 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, while (1) { size_t ret2; - ret2 = ZSTD_compressStream(stream, &workspace->out_buf, + ret2 = zstd_compress_stream(stream, &workspace->out_buf, &workspace->in_buf); - if (ZSTD_isError(ret2)) { - pr_debug("BTRFS: ZSTD_compressStream returned %d\n", - ZSTD_getErrorCode(ret2)); + if (zstd_is_error(ret2)) { + pr_debug("BTRFS: zstd_compress_stream returned %d\n", + zstd_get_error_code(ret2)); ret = -EIO; goto out; } @@ -487,10 +487,10 @@ int zstd_compress_pages(struct list_head *ws, struct address_space *mapping, while (1) { size_t ret2; - ret2 = ZSTD_endStream(stream, &workspace->out_buf); - if (ZSTD_isError(ret2)) { - pr_debug("BTRFS: ZSTD_endStream returned %d\n", - ZSTD_getErrorCode(ret2)); + ret2 = zstd_end_stream(stream, &workspace->out_buf); + if (zstd_is_error(ret2)) { + pr_debug("BTRFS: zstd_end_stream returned %d\n", + zstd_get_error_code(ret2)); ret = -EIO; goto out; } @@ -550,17 +550,17 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) u64 disk_start = cb->start; struct bio *orig_bio = cb->orig_bio; size_t srclen = cb->compressed_len; - ZSTD_DStream *stream; + zstd_dstream *stream; int ret = 0; unsigned long page_in_index = 0; unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); unsigned long buf_start; unsigned long total_out = 0; - stream = ZSTD_initDStream( + stream = zstd_init_dstream( ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); if (!stream) { - pr_debug("BTRFS: ZSTD_initDStream failed\n"); + pr_debug("BTRFS: zstd_init_dstream failed\n"); ret = -EIO; goto done; } @@ -576,11 +576,11 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) while (1) { size_t ret2; - ret2 = ZSTD_decompressStream(stream, &workspace->out_buf, + ret2 = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf); - if (ZSTD_isError(ret2)) { - pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", - ZSTD_getErrorCode(ret2)); + if (zstd_is_error(ret2)) { + pr_debug("BTRFS: zstd_decompress_stream returned %d\n", + zstd_get_error_code(ret2)); ret = -EIO; goto done; } @@ -626,17 +626,17 @@ int zstd_decompress(struct list_head *ws, unsigned char *data_in, size_t destlen) { struct workspace *workspace = list_entry(ws, struct workspace, list); - ZSTD_DStream *stream; + zstd_dstream *stream; int ret = 0; size_t ret2; unsigned long total_out = 0; unsigned long pg_offset = 0; char *kaddr; - stream = ZSTD_initDStream( + stream = zstd_init_dstream( ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); if (!stream) { - pr_warn("BTRFS: ZSTD_initDStream failed\n"); + pr_warn("BTRFS: zstd_init_dstream failed\n"); ret = -EIO; goto finish; } @@ -660,15 +660,15 @@ int zstd_decompress(struct list_head *ws, unsigned char *data_in, /* Check if the frame is over and we still need more input */ if (ret2 == 0) { - pr_debug("BTRFS: ZSTD_decompressStream ended early\n"); + pr_debug("BTRFS: zstd_decompress_stream ended early\n"); ret = -EIO; goto finish; } - ret2 = ZSTD_decompressStream(stream, &workspace->out_buf, + ret2 = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf); - if (ZSTD_isError(ret2)) { - pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", - ZSTD_getErrorCode(ret2)); + if (zstd_is_error(ret2)) { + pr_debug("BTRFS: zstd_decompress_stream returned %d\n", + zstd_get_error_code(ret2)); ret = -EIO; goto finish; } diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index e027c718ca01..8ffc40e84a59 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -24,17 +24,16 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, container_of(wait, struct cachefiles_one_read, monitor); struct cachefiles_object *object; struct fscache_retrieval *op = monitor->op; - struct wait_bit_key *key = _key; + struct wait_page_key *key = _key; struct page *page = wait->private; ASSERT(key); _enter("{%lu},%u,%d,{%p,%u}", monitor->netfs_page->index, mode, sync, - key->flags, key->bit_nr); + key->page, key->bit_nr); - if (key->flags != &page->flags || - key->bit_nr != PG_locked) + if (key->page != page || key->bit_nr != PG_locked) return 0; _debug("--- monitor %p %lx ---", page, page->flags); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 255a512f1277..638d18c198ea 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -3093,10 +3093,12 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had, dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had), last ? " last" : "", put ? " put" : ""); - if (last && !skip_checking_caps) - ceph_check_caps(ci, 0, NULL); - else if (flushsnaps) - ceph_flush_snaps(ci, NULL); + if (!skip_checking_caps) { + if (last) + ceph_check_caps(ci, 0, NULL); + else if (flushsnaps) + ceph_flush_snaps(ci, NULL); + } if (wake) wake_up_all(&ci->i_cap_wq); while (put-- > 0) diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c index d35f599aa00e..f2d730fffccb 100644 --- a/fs/cifs/cifs_swn.c +++ b/fs/cifs/cifs_swn.c @@ -272,7 +272,7 @@ static struct cifs_swn_reg *cifs_find_swn_reg(struct cifs_tcon *tcon) if (IS_ERR(share_name)) { int ret; - ret = PTR_ERR(net_name); + ret = PTR_ERR(share_name); cifs_dbg(VFS, "%s: failed to extract share name from target '%s': %d\n", __func__, tcon->treeName, ret); kfree(net_name); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index ab883e84e116..8a6a1772590b 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -290,7 +290,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) rc = server->ops->queryfs(xid, tcon, cifs_sb, buf); free_xid(xid); - return 0; + return rc; } static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 50fcb65920e8..ca1af03e9370 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -256,7 +256,7 @@ struct smb_version_operations { /* verify the message */ int (*check_message)(char *, unsigned int, struct TCP_Server_Info *); bool (*is_oplock_break)(char *, struct TCP_Server_Info *); - int (*handle_cancelled_mid)(char *, struct TCP_Server_Info *); + int (*handle_cancelled_mid)(struct mid_q_entry *, struct TCP_Server_Info *); void (*downgrade_oplock)(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, unsigned int epoch, bool *purge_cache); @@ -915,8 +915,8 @@ struct cifs_ses { bool binding:1; /* are we binding the session? */ __u16 session_flags; __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE]; - __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE]; - __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE]; + __u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE]; + __u8 smb3decryptionkey[SMB3_ENC_DEC_KEY_SIZE]; __u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; __u8 binding_preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; @@ -1701,10 +1701,11 @@ static inline bool is_retryable_error(int error) #define CIFS_NO_RSP_BUF 0x040 /* no response buffer required */ /* Type of request operation */ -#define CIFS_ECHO_OP 0x080 /* echo request */ -#define CIFS_OBREAK_OP 0x0100 /* oplock break request */ -#define CIFS_NEG_OP 0x0200 /* negotiate request */ -#define CIFS_OP_MASK 0x0380 /* mask request type */ +#define CIFS_ECHO_OP 0x080 /* echo request */ +#define CIFS_OBREAK_OP 0x0100 /* oplock break request */ +#define CIFS_NEG_OP 0x0200 /* negotiate request */ +#define CIFS_CP_CREATE_CLOSE_OP 0x0400 /* compound create+close request */ +#define CIFS_OP_MASK 0x0780 /* mask request type */ #define CIFS_HAS_CREDITS 0x0400 /* already has credits */ #define CIFS_TRANSFORM_REQ 0x0800 /* transform request before sending */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 64fe5a47b5e8..9adc74bd9f8f 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -147,6 +147,11 @@ */ #define SMB3_SIGN_KEY_SIZE (16) +/* + * Size of the smb3 encryption/decryption keys + */ +#define SMB3_ENC_DEC_KEY_SIZE (32) + #define CIFS_CLIENT_CHALLENGE_SIZE (8) #define CIFS_SERVER_CHALLENGE_SIZE (8) #define CIFS_HMAC_MD5_HASH_SIZE (16) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4bb9decbbf27..70d0f0388af4 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1405,6 +1405,11 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx) tcp_ses->min_offload = ctx->min_offload; tcp_ses->tcpStatus = CifsNeedNegotiate; + if ((ctx->max_credits < 20) || (ctx->max_credits > 60000)) + tcp_ses->max_credits = SMB2_MAX_CREDITS_AVAILABLE; + else + tcp_ses->max_credits = ctx->max_credits; + tcp_ses->nr_targets = 1; tcp_ses->ignore_signature = ctx->ignore_signature; /* thread spawned, put it on the list */ @@ -2806,11 +2811,6 @@ static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cif *nserver = server; - if ((ctx->max_credits < 20) || (ctx->max_credits > 60000)) - server->max_credits = SMB2_MAX_CREDITS_AVAILABLE; - else - server->max_credits = ctx->max_credits; - /* get a reference to a SMB session */ ses = cifs_get_smb_ses(server, ctx); if (IS_ERR(ses)) { @@ -3038,96 +3038,91 @@ static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it, return 0; } -static int setup_dfs_tgt_conn(const char *path, const char *full_path, - const struct dfs_cache_tgt_iterator *tgt_it, - struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx, - unsigned int *xid, struct TCP_Server_Info **server, - struct cifs_ses **ses, struct cifs_tcon **tcon) -{ - int rc; - struct dfs_info3_param ref = {0}; - char *mdata = NULL; - struct smb3_fs_context fake_ctx = {NULL}; - char *fake_devname = NULL; - - cifs_dbg(FYI, "%s: dfs path: %s\n", __func__, path); - - rc = dfs_cache_get_tgt_referral(path, tgt_it, &ref); - if (rc) - return rc; - - mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, - full_path + 1, &ref, - &fake_devname); - free_dfs_info_param(&ref); - - if (IS_ERR(mdata)) { - rc = PTR_ERR(mdata); - mdata = NULL; - } else - rc = cifs_setup_volume_info(&fake_ctx, mdata, fake_devname); - - kfree(mdata); - kfree(fake_devname); - - if (!rc) { - /* - * We use a 'fake_ctx' here because we need pass it down to the - * mount_{get,put} functions to test connection against new DFS - * targets. - */ - mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon); - rc = mount_get_conns(&fake_ctx, cifs_sb, xid, server, ses, - tcon); - if (!rc || (*server && *ses)) { - /* - * We were able to connect to new target server. - * Update current context with new target server. - */ - rc = update_vol_info(tgt_it, &fake_ctx, ctx); - } - } - smb3_cleanup_fs_context_contents(&fake_ctx); - return rc; -} - static int do_dfs_failover(const char *path, const char *full_path, struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx, struct cifs_ses *root_ses, unsigned int *xid, struct TCP_Server_Info **server, struct cifs_ses **ses, struct cifs_tcon **tcon) { int rc; - struct dfs_cache_tgt_list tgt_list; + struct dfs_cache_tgt_list tgt_list = {0}; struct dfs_cache_tgt_iterator *tgt_it = NULL; + struct smb3_fs_context tmp_ctx = {NULL}; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) return -EOPNOTSUPP; + cifs_dbg(FYI, "%s: path=%s full_path=%s\n", __func__, path, full_path); + rc = dfs_cache_noreq_find(path, NULL, &tgt_list); if (rc) return rc; + /* + * We use a 'tmp_ctx' here because we need pass it down to the mount_{get,put} functions to + * test connection against new DFS targets. + */ + rc = smb3_fs_context_dup(&tmp_ctx, ctx); + if (rc) + goto out; for (;;) { + struct dfs_info3_param ref = {0}; + char *fake_devname = NULL, *mdata = NULL; + /* Get next DFS target server - if any */ rc = get_next_dfs_tgt(path, &tgt_list, &tgt_it); if (rc) break; - /* Connect to next DFS target */ - rc = setup_dfs_tgt_conn(path, full_path, tgt_it, cifs_sb, ctx, xid, server, ses, - tcon); - if (!rc || (*server && *ses)) + + rc = dfs_cache_get_tgt_referral(path, tgt_it, &ref); + if (rc) + break; + + cifs_dbg(FYI, "%s: old ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC, + tmp_ctx.prepath); + + mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, full_path + 1, &ref, + &fake_devname); + free_dfs_info_param(&ref); + + if (IS_ERR(mdata)) { + rc = PTR_ERR(mdata); + mdata = NULL; + } else + rc = cifs_setup_volume_info(&tmp_ctx, mdata, fake_devname); + + kfree(mdata); + kfree(fake_devname); + + if (rc) + break; + + cifs_dbg(FYI, "%s: new ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC, + tmp_ctx.prepath); + + mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon); + rc = mount_get_conns(&tmp_ctx, cifs_sb, xid, server, ses, tcon); + if (!rc || (*server && *ses)) { + /* + * We were able to connect to new target server. Update current context with + * new target server. + */ + rc = update_vol_info(tgt_it, &tmp_ctx, ctx); break; + } } if (!rc) { + cifs_dbg(FYI, "%s: final ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC, + tmp_ctx.prepath); /* - * Update DFS target hint in DFS referral cache with the target - * server we successfully reconnected to. + * Update DFS target hint in DFS referral cache with the target server we + * successfully reconnected to. */ - rc = dfs_cache_update_tgthint(*xid, root_ses ? root_ses : *ses, - cifs_sb->local_nls, - cifs_remap(cifs_sb), path, - tgt_it); + rc = dfs_cache_update_tgthint(*xid, root_ses ? root_ses : *ses, cifs_sb->local_nls, + cifs_remap(cifs_sb), path, tgt_it); } + +out: + smb3_cleanup_fs_context_contents(&tmp_ctx); dfs_cache_free_tgts(&tgt_list); return rc; } @@ -3285,77 +3280,77 @@ static void put_root_ses(struct cifs_ses *ses) cifs_put_smb_ses(ses); } -/* Check if a path component is remote and then update @dfs_path accordingly */ -static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx, - const unsigned int xid, struct TCP_Server_Info *server, - struct cifs_tcon *tcon, char **dfs_path) +/* Set up next dfs prefix path in @dfs_path */ +static int next_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx, + const unsigned int xid, struct TCP_Server_Info *server, + struct cifs_tcon *tcon, char **dfs_path) { - char *path, *s; - char sep = CIFS_DIR_SEP(cifs_sb), tmp; - char *npath; - int rc = 0; - int added_treename = tcon->Flags & SMB_SHARE_IS_IN_DFS; - int skip = added_treename; + char *path, *npath; + int added_treename = is_tcon_dfs(tcon); + int rc; path = cifs_build_path_to_root(ctx, cifs_sb, tcon, added_treename); if (!path) return -ENOMEM; - /* - * Walk through the path components in @path and check if they're accessible. In case any of - * the components is -EREMOTE, then update @dfs_path with the next DFS referral request path - * (NOT including the remaining components). - */ - s = path; - do { - /* skip separators */ - while (*s && *s == sep) - s++; - if (!*s) - break; - /* next separator */ - while (*s && *s != sep) - s++; - /* - * if the treename is added, we then have to skip the first - * part within the separators - */ - if (skip) { - skip = 0; - continue; + rc = is_path_remote(cifs_sb, ctx, xid, server, tcon); + if (rc == -EREMOTE) { + struct smb3_fs_context v = {NULL}; + /* if @path contains a tree name, skip it in the prefix path */ + if (added_treename) { + rc = smb3_parse_devname(path, &v); + if (rc) + goto out; + npath = build_unc_path_to_root(&v, cifs_sb, true); + smb3_cleanup_fs_context_contents(&v); + } else { + v.UNC = ctx->UNC; + v.prepath = path + 1; + npath = build_unc_path_to_root(&v, cifs_sb, true); } - tmp = *s; - *s = 0; - rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, path); - if (rc && rc == -EREMOTE) { - struct smb3_fs_context v = {NULL}; - /* if @path contains a tree name, skip it in the prefix path */ - if (added_treename) { - rc = smb3_parse_devname(path, &v); - if (rc) - break; - rc = -EREMOTE; - npath = build_unc_path_to_root(&v, cifs_sb, true); - smb3_cleanup_fs_context_contents(&v); - } else { - v.UNC = ctx->UNC; - v.prepath = path + 1; - npath = build_unc_path_to_root(&v, cifs_sb, true); - } - if (IS_ERR(npath)) { - rc = PTR_ERR(npath); - break; - } - kfree(*dfs_path); - *dfs_path = npath; + + if (IS_ERR(npath)) { + rc = PTR_ERR(npath); + goto out; } - *s = tmp; - } while (rc == 0); + kfree(*dfs_path); + *dfs_path = npath; + rc = -EREMOTE; + } + +out: kfree(path); return rc; } +/* Check if resolved targets can handle any DFS referrals */ +static int is_referral_server(const char *ref_path, struct cifs_tcon *tcon, bool *ref_server) +{ + int rc; + struct dfs_info3_param ref = {0}; + + if (is_tcon_dfs(tcon)) { + *ref_server = true; + } else { + cifs_dbg(FYI, "%s: ref_path=%s\n", __func__, ref_path); + + rc = dfs_cache_noreq_find(ref_path, &ref, NULL); + if (rc) { + cifs_dbg(VFS, "%s: dfs_cache_noreq_find: failed (rc=%d)\n", __func__, rc); + return rc; + } + cifs_dbg(FYI, "%s: ref.flags=0x%x\n", __func__, ref.flags); + /* + * Check if all targets are capable of handling DFS referrals as per + * MS-DFSC 2.2.4 RESP_GET_DFS_REFERRAL. + */ + *ref_server = !!(ref.flags & DFSREF_REFERRAL_SERVER); + free_dfs_info_param(&ref); + } + return 0; +} + int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) { int rc = 0; @@ -3367,18 +3362,19 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) char *ref_path = NULL, *full_path = NULL; char *oldmnt = NULL; char *mntdata = NULL; + bool ref_server = false; rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon); /* - * Unconditionally try to get an DFS referral (even cached) to determine whether it is an - * DFS mount. + * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally + * try to get an DFS referral (even cached) to determine whether it is an DFS mount. * * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem * to respond with PATH_NOT_COVERED to requests that include the prefix. */ - if (dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL, + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) || + dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL, NULL)) { - /* No DFS referral was returned. Looks like a regular share. */ if (rc) goto error; /* Check if it is fully accessible and then mount it */ @@ -3432,13 +3428,18 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) break; if (!tcon) continue; + /* Make sure that requests go through new root servers */ - if (is_tcon_dfs(tcon)) { + rc = is_referral_server(ref_path + 1, tcon, &ref_server); + if (rc) + break; + if (ref_server) { put_root_ses(root_ses); set_root_ses(cifs_sb, ses, &root_ses); } - /* Check for remaining path components and then continue chasing them (-EREMOTE) */ - rc = check_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path); + + /* Get next dfs path and then continue chasing them if -EREMOTE */ + rc = next_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path); /* Prevent recursion on broken link referrals */ if (rc == -EREMOTE && ++count > MAX_NESTED_LINKS) rc = -ELOOP; diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index 4950ab0486ae..098b4bc8da59 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -37,11 +37,12 @@ struct cache_dfs_tgt { struct cache_entry { struct hlist_node hlist; const char *path; - int ttl; - int srvtype; - int flags; + int hdr_flags; /* RESP_GET_DFS_REFERRAL.ReferralHeaderFlags */ + int ttl; /* DFS_REREFERRAL_V3.TimeToLive */ + int srvtype; /* DFS_REREFERRAL_V3.ServerType */ + int ref_flags; /* DFS_REREFERRAL_V3.ReferralEntryFlags */ struct timespec64 etime; - int path_consumed; + int path_consumed; /* RESP_GET_DFS_REFERRAL.PathConsumed */ int numtgts; struct list_head tlist; struct cache_dfs_tgt *tgthint; @@ -166,14 +167,11 @@ static int dfscache_proc_show(struct seq_file *m, void *v) continue; seq_printf(m, - "cache entry: path=%s,type=%s,ttl=%d,etime=%ld," - "interlink=%s,path_consumed=%d,expired=%s\n", - ce->path, - ce->srvtype == DFS_TYPE_ROOT ? "root" : "link", - ce->ttl, ce->etime.tv_nsec, - IS_INTERLINK_SET(ce->flags) ? "yes" : "no", - ce->path_consumed, - cache_entry_expired(ce) ? "yes" : "no"); + "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,hdr_flags=0x%x,ref_flags=0x%x,interlink=%s,path_consumed=%d,expired=%s\n", + ce->path, ce->srvtype == DFS_TYPE_ROOT ? "root" : "link", + ce->ttl, ce->etime.tv_nsec, ce->ref_flags, ce->hdr_flags, + IS_INTERLINK_SET(ce->hdr_flags) ? "yes" : "no", + ce->path_consumed, cache_entry_expired(ce) ? "yes" : "no"); list_for_each_entry(t, &ce->tlist, list) { seq_printf(m, " %s%s\n", @@ -236,11 +234,12 @@ static inline void dump_tgts(const struct cache_entry *ce) static inline void dump_ce(const struct cache_entry *ce) { - cifs_dbg(FYI, "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,interlink=%s,path_consumed=%d,expired=%s\n", + cifs_dbg(FYI, "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,hdr_flags=0x%x,ref_flags=0x%x,interlink=%s,path_consumed=%d,expired=%s\n", ce->path, ce->srvtype == DFS_TYPE_ROOT ? "root" : "link", ce->ttl, ce->etime.tv_nsec, - IS_INTERLINK_SET(ce->flags) ? "yes" : "no", + ce->hdr_flags, ce->ref_flags, + IS_INTERLINK_SET(ce->hdr_flags) ? "yes" : "no", ce->path_consumed, cache_entry_expired(ce) ? "yes" : "no"); dump_tgts(ce); @@ -381,7 +380,8 @@ static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs, ce->ttl = refs[0].ttl; ce->etime = get_expire_time(ce->ttl); ce->srvtype = refs[0].server_type; - ce->flags = refs[0].ref_flag; + ce->hdr_flags = refs[0].flags; + ce->ref_flags = refs[0].ref_flag; ce->path_consumed = refs[0].path_consumed; for (i = 0; i < numrefs; i++) { @@ -799,7 +799,8 @@ static int setup_referral(const char *path, struct cache_entry *ce, ref->path_consumed = ce->path_consumed; ref->ttl = ce->ttl; ref->server_type = ce->srvtype; - ref->ref_flag = ce->flags; + ref->ref_flag = ce->ref_flags; + ref->flags = ce->hdr_flags; return 0; diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 12a5da0230b5..3a26ad47b220 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -542,20 +542,37 @@ static int smb3_fs_context_parse_monolithic(struct fs_context *fc, /* BB Need to add support for sep= here TBD */ while ((key = strsep(&options, ",")) != NULL) { - if (*key) { - size_t v_len = 0; - char *value = strchr(key, '='); - - if (value) { - if (value == key) - continue; - *value++ = 0; - v_len = strlen(value); - } - ret = vfs_parse_fs_string(fc, key, value, v_len); - if (ret < 0) - break; + size_t len; + char *value; + + if (*key == 0) + break; + + /* Check if following character is the deliminator If yes, + * we have encountered a double deliminator reset the NULL + * character to the deliminator + */ + while (options && options[0] == ',') { + len = strlen(key); + strcpy(key + len, options); + options = strchr(options, ','); + if (options) + *options++ = 0; } + + + len = 0; + value = strchr(key, '='); + if (value) { + if (value == key) + continue; + *value++ = 0; + len = strlen(value); + } + + ret = vfs_parse_fs_string(fc, key, value, len); + if (ret < 0) + break; } return ret; @@ -1158,9 +1175,11 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, pr_warn_once("Witness protocol support is experimental\n"); break; case Opt_rootfs: -#ifdef CONFIG_CIFS_ROOT - ctx->rootfs = true; +#ifndef CONFIG_CIFS_ROOT + cifs_dbg(VFS, "rootfs support requires CONFIG_CIFS_ROOT config option\n"); + goto cifs_parse_mount_err; #endif + ctx->rootfs = true; break; case Opt_posixpaths: if (result.negated) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a83b3a8ffaac..cbff8a7e36a9 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -2383,7 +2383,7 @@ int cifs_getattr(const struct path *path, struct kstat *stat, * We need to be sure that all dirty pages are written and the server * has actual ctime, mtime and file length. */ - if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_SIZE)) && + if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_SIZE | STATX_BLOCKS)) && !CIFS_CACHE_READ(CIFS_I(inode)) && inode->i_mapping && inode->i_mapping->nrpages != 0) { rc = filemap_fdatawait(inode->i_mapping); @@ -2573,6 +2573,14 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, if (rc == 0) { cifsInode->server_eof = attrs->ia_size; cifs_setsize(inode, attrs->ia_size); + /* + * i_blocks is not related to (i_size / i_blksize), but instead + * 512 byte (2**9) size is required for calculating num blocks. + * Until we can query the server for actual allocation size, + * this is best estimate we have for blocks allocated for a file + * Number of blocks must be rounded up so size 1 is not 0 blocks + */ + inode->i_blocks = (512 - 1 + attrs->ia_size) >> 9; /* * The man page of truncate says if the size changed, diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 213465718fa8..dea4959989b5 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -230,6 +230,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, ctx.noautotune = ses->server->noautotune; ctx.sockopt_tcp_nodelay = ses->server->tcp_nodelay; ctx.echo_interval = ses->server->echo_interval / HZ; + ctx.max_credits = ses->server->max_credits; /* * This will be used for encoding/decoding user/domain/pw diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index 99a1951a01ec..d9a990c99121 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h @@ -58,6 +58,7 @@ #define SMB2_HMACSHA256_SIZE (32) #define SMB2_CMACAES_SIZE (16) #define SMB3_SIGNKEY_SIZE (16) +#define SMB3_GCM128_CRYPTKEY_SIZE (16) #define SMB3_GCM256_CRYPTKEY_SIZE (32) /* Maximum buffer size value we can send with 1 credit */ diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c index 1f900b81c34a..a718dc77e604 100644 --- a/fs/cifs/smb2inode.c +++ b/fs/cifs/smb2inode.c @@ -358,6 +358,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon, if (cfile) goto after_close; /* Close */ + flags |= CIFS_CP_CREATE_CLOSE_OP; rqst[num_rqst].rq_iov = &vars->close_iov[0]; rqst[num_rqst].rq_nvec = 1; rc = SMB2_close_init(tcon, server, diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 60d4bd1eae2b..d9073b569e17 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -844,14 +844,14 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid, } int -smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server) +smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server) { - struct smb2_sync_hdr *sync_hdr = (struct smb2_sync_hdr *)buffer; - struct smb2_create_rsp *rsp = (struct smb2_create_rsp *)buffer; + struct smb2_sync_hdr *sync_hdr = mid->resp_buf; + struct smb2_create_rsp *rsp = mid->resp_buf; struct cifs_tcon *tcon; int rc; - if (sync_hdr->Command != SMB2_CREATE || + if ((mid->optype & CIFS_CP_CREATE_CLOSE_OP) || sync_hdr->Command != SMB2_CREATE || sync_hdr->Status != STATUS_SUCCESS) return 0; diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index f19274857292..7b614a7096cd 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -1164,7 +1164,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon, struct TCP_Server_Info *server = cifs_pick_channel(ses); __le16 *utf16_path = NULL; int ea_name_len = strlen(ea_name); - int flags = 0; + int flags = CIFS_CP_CREATE_CLOSE_OP; int len; struct smb_rqst rqst[3]; int resp_buftype[3]; @@ -1542,7 +1542,7 @@ smb2_ioctl_query_info(const unsigned int xid, struct smb_query_info qi; struct smb_query_info __user *pqi; int rc = 0; - int flags = 0; + int flags = CIFS_CP_CREATE_CLOSE_OP; struct smb2_query_info_rsp *qi_rsp = NULL; struct smb2_ioctl_rsp *io_rsp = NULL; void *buffer = NULL; @@ -2007,6 +2007,7 @@ smb2_duplicate_extents(const unsigned int xid, { int rc; unsigned int ret_data_len; + struct inode *inode; struct duplicate_extents_to_file dup_ext_buf; struct cifs_tcon *tcon = tlink_tcon(trgtfile->tlink); @@ -2023,10 +2024,21 @@ smb2_duplicate_extents(const unsigned int xid, cifs_dbg(FYI, "Duplicate extents: src off %lld dst off %lld len %lld\n", src_off, dest_off, len); - rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false); - if (rc) - goto duplicate_extents_out; + inode = d_inode(trgtfile->dentry); + if (inode->i_size < dest_off + len) { + rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false); + if (rc) + goto duplicate_extents_out; + /* + * Although also could set plausible allocation size (i_blocks) + * here in addition to setting the file size, in reflink + * it is likely that the target file is sparse. Its allocation + * size will be queried on next revalidate, but it is important + * to make sure that file's cached size is updated immediately + */ + cifs_setsize(inode, dest_off + len); + } rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, trgtfile->fid.volatile_fid, FSCTL_DUPLICATE_EXTENTS_TO_FILE, @@ -2516,7 +2528,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon, { struct cifs_ses *ses = tcon->ses; struct TCP_Server_Info *server = cifs_pick_channel(ses); - int flags = 0; + int flags = CIFS_CP_CREATE_CLOSE_OP; struct smb_rqst rqst[3]; int resp_buftype[3]; struct kvec rsp_iov[3]; @@ -2914,7 +2926,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, unsigned int sub_offset; unsigned int print_len; unsigned int print_offset; - int flags = 0; + int flags = CIFS_CP_CREATE_CLOSE_OP; struct smb_rqst rqst[3]; int resp_buftype[3]; struct kvec rsp_iov[3]; @@ -3096,7 +3108,7 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_open_parms oparms; struct cifs_fid fid; struct TCP_Server_Info *server = cifs_pick_channel(tcon->ses); - int flags = 0; + int flags = CIFS_CP_CREATE_CLOSE_OP; struct smb_rqst rqst[3]; int resp_buftype[3]; struct kvec rsp_iov[3]; @@ -4097,7 +4109,7 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key) if (ses->Suid == ses_id) { ses_enc_key = enc ? ses->smb3encryptionkey : ses->smb3decryptionkey; - memcpy(key, ses_enc_key, SMB3_SIGN_KEY_SIZE); + memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE); spin_unlock(&cifs_tcp_ses_lock); return 0; } @@ -4124,7 +4136,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, int rc = 0; struct scatterlist *sg; u8 sign[SMB2_SIGNATURE_SIZE] = {}; - u8 key[SMB3_SIGN_KEY_SIZE]; + u8 key[SMB3_ENC_DEC_KEY_SIZE]; struct aead_request *req; char *iv; unsigned int iv_len; @@ -4148,10 +4160,11 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, tfm = enc ? server->secmech.ccmaesencrypt : server->secmech.ccmaesdecrypt; - if (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM) + if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) || + (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) rc = crypto_aead_setkey(tfm, key, SMB3_GCM256_CRYPTKEY_SIZE); else - rc = crypto_aead_setkey(tfm, key, SMB3_SIGN_KEY_SIZE); + rc = crypto_aead_setkey(tfm, key, SMB3_GCM128_CRYPTKEY_SIZE); if (rc) { cifs_server_dbg(VFS, "%s: Failed to set aead key %d\n", __func__, rc); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 794fc3b68b4f..6a1af5545f67 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -4033,8 +4033,7 @@ smb2_async_readv(struct cifs_readdata *rdata) if (rdata->credits.value > 0) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes, SMB2_MAX_BUFFER_SIZE)); - shdr->CreditRequest = - cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1); + shdr->CreditRequest = cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 8); rc = adjust_credits(server, &rdata->credits, rdata->bytes); if (rc) @@ -4340,8 +4339,7 @@ smb2_async_writev(struct cifs_writedata *wdata, if (wdata->credits.value > 0) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, SMB2_MAX_BUFFER_SIZE)); - shdr->CreditRequest = - cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1); + shdr->CreditRequest = cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 8); rc = adjust_credits(server, &wdata->credits, wdata->bytes); if (rc) diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h index 9565e27681a5..a2eb34a8d9c9 100644 --- a/fs/cifs/smb2proto.h +++ b/fs/cifs/smb2proto.h @@ -246,8 +246,7 @@ extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon, extern int smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid, __u64 volatile_fid); -extern int smb2_handle_cancelled_mid(char *buffer, - struct TCP_Server_Info *server); +extern int smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server); void smb2_cancelled_close_fid(struct work_struct *work); extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_file_id, u64 volatile_file_id, diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index ebccd71cc60a..e6fa76ab70be 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -298,7 +298,8 @@ static int generate_key(struct cifs_ses *ses, struct kvec label, { unsigned char zero = 0x0; __u8 i[4] = {0, 0, 0, 1}; - __u8 L[4] = {0, 0, 0, 128}; + __u8 L128[4] = {0, 0, 0, 128}; + __u8 L256[4] = {0, 0, 1, 0}; int rc = 0; unsigned char prfhash[SMB2_HMACSHA256_SIZE]; unsigned char *hashptr = prfhash; @@ -354,8 +355,14 @@ static int generate_key(struct cifs_ses *ses, struct kvec label, goto smb3signkey_ret; } - rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, - L, 4); + if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) || + (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) { + rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + L256, 4); + } else { + rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + L128, 4); + } if (rc) { cifs_server_dbg(VFS, "%s: Could not update with L\n", __func__); goto smb3signkey_ret; @@ -390,6 +397,9 @@ generate_smb3signingkey(struct cifs_ses *ses, const struct derivation_triplet *ptriplet) { int rc; +#ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS + struct TCP_Server_Info *server = ses->server; +#endif /* * All channels use the same encryption/decryption keys but @@ -422,11 +432,11 @@ generate_smb3signingkey(struct cifs_ses *ses, rc = generate_key(ses, ptriplet->encryption.label, ptriplet->encryption.context, ses->smb3encryptionkey, - SMB3_SIGN_KEY_SIZE); + SMB3_ENC_DEC_KEY_SIZE); rc = generate_key(ses, ptriplet->decryption.label, ptriplet->decryption.context, ses->smb3decryptionkey, - SMB3_SIGN_KEY_SIZE); + SMB3_ENC_DEC_KEY_SIZE); if (rc) return rc; } @@ -442,14 +452,23 @@ generate_smb3signingkey(struct cifs_ses *ses, */ cifs_dbg(VFS, "Session Id %*ph\n", (int)sizeof(ses->Suid), &ses->Suid); + cifs_dbg(VFS, "Cipher type %d\n", server->cipher_type); cifs_dbg(VFS, "Session Key %*ph\n", SMB2_NTLMV2_SESSKEY_SIZE, ses->auth_key.response); cifs_dbg(VFS, "Signing Key %*ph\n", SMB3_SIGN_KEY_SIZE, ses->smb3signingkey); - cifs_dbg(VFS, "ServerIn Key %*ph\n", - SMB3_SIGN_KEY_SIZE, ses->smb3encryptionkey); - cifs_dbg(VFS, "ServerOut Key %*ph\n", - SMB3_SIGN_KEY_SIZE, ses->smb3decryptionkey); + if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) || + (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) { + cifs_dbg(VFS, "ServerIn Key %*ph\n", + SMB3_GCM256_CRYPTKEY_SIZE, ses->smb3encryptionkey); + cifs_dbg(VFS, "ServerOut Key %*ph\n", + SMB3_GCM256_CRYPTKEY_SIZE, ses->smb3decryptionkey); + } else { + cifs_dbg(VFS, "ServerIn Key %*ph\n", + SMB3_GCM128_CRYPTKEY_SIZE, ses->smb3encryptionkey); + cifs_dbg(VFS, "ServerOut Key %*ph\n", + SMB3_GCM128_CRYPTKEY_SIZE, ses->smb3decryptionkey); + } #endif return rc; } diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 4a2b836eb017..13d685f0ac8e 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c @@ -101,7 +101,7 @@ static void _cifs_mid_q_entry_release(struct kref *refcount) if (midEntry->resp_buf && (midEntry->mid_flags & MID_WAIT_CANCELLED) && midEntry->mid_state == MID_RESPONSE_RECEIVED && server->ops->handle_cancelled_mid) - server->ops->handle_cancelled_mid(midEntry->resp_buf, server); + server->ops->handle_cancelled_mid(midEntry, server); midEntry->mid_state = MID_FREE; atomic_dec(&midCount); @@ -1171,9 +1171,12 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, /* * Compounding is never used during session establish. */ - if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) + if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) { + mutex_lock(&server->srv_mutex); smb311_update_preauth_hash(ses, rqst[0].rq_iov, rqst[0].rq_nvec); + mutex_unlock(&server->srv_mutex); + } for (i = 0; i < num_rqst; i++) { rc = wait_for_response(server, midQ[i]); @@ -1182,7 +1185,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, } if (rc != 0) { for (; i < num_rqst; i++) { - cifs_server_dbg(VFS, "Cancelling wait for mid %llu cmd: %d\n", + cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n", midQ[i]->mid, le16_to_cpu(midQ[i]->command)); send_cancel(server, &rqst[i], midQ[i]); spin_lock(&GlobalMid_Lock); @@ -1241,7 +1244,9 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, .iov_base = resp_iov[0].iov_base, .iov_len = resp_iov[0].iov_len }; + mutex_lock(&server->srv_mutex); smb311_update_preauth_hash(ses, &iov, 1); + mutex_unlock(&server->srv_mutex); } out: diff --git a/fs/configfs/file.c b/fs/configfs/file.c index 1f0270229d7b..da8351d1e455 100644 --- a/fs/configfs/file.c +++ b/fs/configfs/file.c @@ -378,7 +378,7 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type attr = to_attr(dentry); if (!attr) - goto out_put_item; + goto out_free_buffer; if (type & CONFIGFS_ITEM_BIN_ATTR) { buffer->bin_attr = to_bin_attr(dentry); @@ -391,7 +391,7 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type /* Grab the module reference for this attribute if we have one */ error = -ENODEV; if (!try_module_get(buffer->owner)) - goto out_put_item; + goto out_free_buffer; error = -EACCES; if (!buffer->item->ci_type) @@ -435,8 +435,6 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type out_put_module: module_put(buffer->owner); -out_put_item: - config_item_put(buffer->item); out_free_buffer: up_read(&frag->frag_sem); kfree(buffer); diff --git a/fs/dax.c b/fs/dax.c index 26d5dcd2d69e..b3d27fdc6775 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -810,11 +810,12 @@ static void dax_entry_mkclean(struct address_space *mapping, pgoff_t index, address = pgoff_address(index, vma); /* - * Note because we provide range to follow_pte it will call + * follow_invalidate_pte() will use the range to call * mmu_notifier_invalidate_range_start() on our behalf before * taking any lock. */ - if (follow_pte(vma->vm_mm, address, &range, &ptep, &pmdp, &ptl)) + if (follow_invalidate_pte(vma->vm_mm, address, &range, &ptep, + &pmdp, &ptl)) continue; /* diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 2fcf66473436..86c7f0489620 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -297,7 +297,7 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent) { struct dentry *dentry; - if (IS_ERR(parent)) + if (!debugfs_initialized() || IS_ERR_OR_NULL(name) || IS_ERR(parent)) return NULL; if (!parent) @@ -318,6 +318,9 @@ static struct dentry *start_creating(const char *name, struct dentry *parent) if (!(debugfs_allow & DEBUGFS_ALLOW_API)) return ERR_PTR(-EPERM); + if (!debugfs_initialized()) + return ERR_PTR(-ENOENT); + pr_debug("creating file '%s'\n", name); if (IS_ERR(parent)) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index be10b16ea66e..d5a6b9b888a5 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -158,8 +158,8 @@ static int erofs_read_superblock(struct super_block *sb) blkszbits = dsb->blkszbits; /* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */ if (blkszbits != LOG_BLOCK_SIZE) { - erofs_err(sb, "blksize %u isn't supported on this platform", - 1 << blkszbits); + erofs_err(sb, "blkszbits %u isn't supported on this platform", + blkszbits); goto out; } diff --git a/fs/erofs/xattr.c b/fs/erofs/xattr.c index 5bde77d70852..47314a26767a 100644 --- a/fs/erofs/xattr.c +++ b/fs/erofs/xattr.c @@ -48,8 +48,14 @@ static int init_inode_xattrs(struct inode *inode) int ret = 0; /* the most case is that xattrs of this inode are initialized. */ - if (test_bit(EROFS_I_EA_INITED_BIT, &vi->flags)) + if (test_bit(EROFS_I_EA_INITED_BIT, &vi->flags)) { + /* + * paired with smp_mb() at the end of the function to ensure + * fields will only be observed after the bit is set. + */ + smp_mb(); return 0; + } if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_XATTR_BIT, TASK_KILLABLE)) return -ERESTARTSYS; @@ -137,6 +143,8 @@ static int init_inode_xattrs(struct inode *inode) } xattr_iter_end(&it, atomic_map); + /* paired with smp_mb() at the beginning of the function. */ + smp_mb(); set_bit(EROFS_I_EA_INITED_BIT, &vi->flags); out_unlock: diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c index ae325541884e..14d2de35110c 100644 --- a/fs/erofs/zmap.c +++ b/fs/erofs/zmap.c @@ -36,8 +36,14 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) void *kaddr; struct z_erofs_map_header *h; - if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) + if (test_bit(EROFS_I_Z_INITED_BIT, &vi->flags)) { + /* + * paired with smp_mb() at the end of the function to ensure + * fields will only be observed after the bit is set. + */ + smp_mb(); return 0; + } if (wait_on_bit_lock(&vi->flags, EROFS_I_BL_Z_BIT, TASK_KILLABLE)) return -ERESTARTSYS; @@ -83,6 +89,8 @@ static int z_erofs_fill_inode_lazy(struct inode *inode) vi->z_physical_clusterbits[1] = vi->z_logical_clusterbits + ((h->h_clusterbits >> 5) & 7); + /* paired with smp_mb() at the beginning of the function */ + smp_mb(); set_bit(EROFS_I_Z_INITED_BIT, &vi->flags); unmap_done: kunmap_atomic(kaddr); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index a829af074eb5..3196474cbe24 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -979,7 +979,7 @@ static struct epitem *ep_find(struct eventpoll *ep, struct file *file, int fd) return epir; } -#ifdef CONFIG_CHECKPOINT_RESTORE +#ifdef CONFIG_KCMP static struct epitem *ep_find_tfd(struct eventpoll *ep, int tfd, unsigned long toff) { struct rb_node *rbp; @@ -1021,7 +1021,7 @@ struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, return file_raw; } -#endif /* CONFIG_CHECKPOINT_RESTORE */ +#endif /* CONFIG_KCMP */ /** * Adds a new entry to the tail of the list in a lockless way, i.e. diff --git a/fs/exfat/exfat_raw.h b/fs/exfat/exfat_raw.h index 6aec6288e1f2..7f39b1c6469c 100644 --- a/fs/exfat/exfat_raw.h +++ b/fs/exfat/exfat_raw.h @@ -77,6 +77,10 @@ #define EXFAT_FILE_NAME_LEN 15 +#define EXFAT_MIN_SECT_SIZE_BITS 9 +#define EXFAT_MAX_SECT_SIZE_BITS 12 +#define EXFAT_MAX_SECT_PER_CLUS_BITS(x) (25 - (x)->sect_size_bits) + /* EXFAT: Main and Backup Boot Sector (512 bytes) */ struct boot_sector { __u8 jmp_boot[BOOTSEC_JUMP_BOOT_LEN]; diff --git a/fs/exfat/super.c b/fs/exfat/super.c index 87be5bfc31eb..c6d8d2e53486 100644 --- a/fs/exfat/super.c +++ b/fs/exfat/super.c @@ -381,8 +381,7 @@ static int exfat_calibrate_blocksize(struct super_block *sb, int logical_sect) { struct exfat_sb_info *sbi = EXFAT_SB(sb); - if (!is_power_of_2(logical_sect) || - logical_sect < 512 || logical_sect > 4096) { + if (!is_power_of_2(logical_sect)) { exfat_err(sb, "bogus logical sector size %u", logical_sect); return -EIO; } @@ -451,6 +450,25 @@ static int exfat_read_boot_sector(struct super_block *sb) return -EINVAL; } + /* + * sect_size_bits could be at least 9 and at most 12. + */ + if (p_boot->sect_size_bits < EXFAT_MIN_SECT_SIZE_BITS || + p_boot->sect_size_bits > EXFAT_MAX_SECT_SIZE_BITS) { + exfat_err(sb, "bogus sector size bits : %u\n", + p_boot->sect_size_bits); + return -EINVAL; + } + + /* + * sect_per_clus_bits could be at least 0 and at most 25 - sect_size_bits. + */ + if (p_boot->sect_per_clus_bits > EXFAT_MAX_SECT_PER_CLUS_BITS(p_boot)) { + exfat_err(sb, "bogus sectors bits per cluster : %u\n", + p_boot->sect_per_clus_bits); + return -EINVAL; + } + sbi->sect_per_clus = 1 << p_boot->sect_per_clus_bits; sbi->sect_per_clus_bits = p_boot->sect_per_clus_bits; sbi->cluster_size_bits = p_boot->sect_per_clus_bits + @@ -477,16 +495,19 @@ static int exfat_read_boot_sector(struct super_block *sb) sbi->used_clusters = EXFAT_CLUSTERS_UNTRACKED; /* check consistencies */ - if (sbi->num_FAT_sectors << p_boot->sect_size_bits < - sbi->num_clusters * 4) { + if ((u64)sbi->num_FAT_sectors << p_boot->sect_size_bits < + (u64)sbi->num_clusters * 4) { exfat_err(sb, "bogus fat length"); return -EINVAL; } + if (sbi->data_start_sector < - sbi->FAT1_start_sector + sbi->num_FAT_sectors * p_boot->num_fats) { + (u64)sbi->FAT1_start_sector + + (u64)sbi->num_FAT_sectors * p_boot->num_fats) { exfat_err(sb, "bogus data start sector"); return -EINVAL; } + if (sbi->vol_flags & VOLUME_DIRTY) exfat_warn(sb, "Volume was not properly unmounted. Some data may be corrupt. Please run fsck."); if (sbi->vol_flags & MEDIA_FAILURE) diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index 619dd35ddd48..86699c8cab28 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -103,8 +103,7 @@ config EXT4_DEBUG config EXT4_KUNIT_TESTS tristate "KUnit tests for ext4" if !KUNIT_ALL_TESTS - select EXT4_FS - depends on KUNIT + depends on EXT4_FS && KUNIT default KUNIT_ALL_TESTS help This builds the ext4 KUnit tests. diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2866d249f3d2..e5c81593d972 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2792,6 +2792,8 @@ void __ext4_fc_track_link(handle_t *handle, struct inode *inode, struct dentry *dentry); void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry); void ext4_fc_track_link(handle_t *handle, struct dentry *dentry); +void __ext4_fc_track_create(handle_t *handle, struct inode *inode, + struct dentry *dentry); void ext4_fc_track_create(handle_t *handle, struct dentry *dentry); void ext4_fc_track_inode(handle_t *handle, struct inode *inode); void ext4_fc_mark_ineligible(struct super_block *sb, int reason); diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 0a14a7c87bf8..62e9e5535fa7 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -513,10 +513,10 @@ void ext4_fc_track_link(handle_t *handle, struct dentry *dentry) __ext4_fc_track_link(handle, d_inode(dentry), dentry); } -void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) +void __ext4_fc_track_create(handle_t *handle, struct inode *inode, + struct dentry *dentry) { struct __track_dentry_update_args args; - struct inode *inode = d_inode(dentry); int ret; args.dentry = dentry; @@ -527,6 +527,11 @@ void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) trace_ext4_fc_track_create(inode, dentry, ret); } +void ext4_fc_track_create(handle_t *handle, struct dentry *dentry) +{ + __ext4_fc_track_create(handle, d_inode(dentry), dentry); +} + /* __track_fn for inode tracking */ static int __track_inode(struct inode *inode, void *arg, bool update) { diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index c173c8405856..ed498538a749 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5029,7 +5029,7 @@ static int ext4_do_update_inode(handle_t *handle, struct ext4_inode_info *ei = EXT4_I(inode); struct buffer_head *bh = iloc->bh; struct super_block *sb = inode->i_sb; - int err = 0, rc, block; + int err = 0, block; int need_datasync = 0, set_large_file = 0; uid_t i_uid; gid_t i_gid; @@ -5141,9 +5141,9 @@ static int ext4_do_update_inode(handle_t *handle, bh->b_data); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); - rc = ext4_handle_dirty_metadata(handle, NULL, bh); - if (!err) - err = rc; + err = ext4_handle_dirty_metadata(handle, NULL, bh); + if (err) + goto out_brelse; ext4_clear_inode_state(inode, EXT4_STATE_NEW); if (set_large_file) { BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access"); @@ -5389,8 +5389,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) inode->i_gid = attr->ia_gid; error = ext4_mark_inode_dirty(handle, inode); ext4_journal_stop(handle); - if (unlikely(error)) + if (unlikely(error)) { + ext4_fc_stop_update(inode); return error; + } } if (attr->ia_valid & ATTR_SIZE) { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 99bf091fee10..a02fadf4fc84 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2709,8 +2709,15 @@ static int ext4_mb_init_backend(struct super_block *sb) } if (ext4_has_feature_flex_bg(sb)) { - /* a single flex group is supposed to be read by a single IO */ - sbi->s_mb_prefetch = min(1 << sbi->s_es->s_log_groups_per_flex, + /* a single flex group is supposed to be read by a single IO. + * 2 ^ s_log_groups_per_flex != UINT_MAX as s_mb_prefetch is + * unsigned integer, so the maximum shift is 32. + */ + if (sbi->s_es->s_log_groups_per_flex >= 32) { + ext4_msg(sb, KERN_ERR, "too many log groups per flexible block group"); + goto err_freesgi; + } + sbi->s_mb_prefetch = min_t(uint, 1 << sbi->s_es->s_log_groups_per_flex, BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9)); sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */ } else { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index cf652ba3e74d..078f26f4b56e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2401,11 +2401,10 @@ static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname, (frame - 1)->bh); if (err) goto journal_error; - if (restart) { - err = ext4_handle_dirty_dx_node(handle, dir, - frame->bh); + err = ext4_handle_dirty_dx_node(handle, dir, + frame->bh); + if (err) goto journal_error; - } } else { struct dx_root *dxroot; memcpy((char *) entries2, (char *) entries, @@ -3602,6 +3601,31 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent, return retval; } +static void ext4_resetent(handle_t *handle, struct ext4_renament *ent, + unsigned ino, unsigned file_type) +{ + struct ext4_renament old = *ent; + int retval = 0; + + /* + * old->de could have moved from under us during make indexed dir, + * so the old->de may no longer valid and need to find it again + * before reset old inode info. + */ + old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); + if (IS_ERR(old.bh)) + retval = PTR_ERR(old.bh); + if (!old.bh) + retval = -ENOENT; + if (retval) { + ext4_std_error(old.dir->i_sb, retval); + return; + } + + ext4_setent(handle, &old, ino, file_type); + brelse(old.bh); +} + static int ext4_find_delete_entry(handle_t *handle, struct inode *dir, const struct qstr *d_name) { @@ -3837,6 +3861,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, retval = ext4_mark_inode_dirty(handle, whiteout); if (unlikely(retval)) goto end_rename; + } if (!new.bh) { retval = ext4_add_entry(handle, new.dentry, old.inode); @@ -3910,6 +3935,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, ext4_fc_track_unlink(handle, new.dentry); __ext4_fc_track_link(handle, old.inode, new.dentry); __ext4_fc_track_unlink(handle, old.inode, old.dentry); + if (whiteout) + __ext4_fc_track_create(handle, whiteout, old.dentry); } if (new.inode) { @@ -3924,8 +3951,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, end_rename: if (whiteout) { if (retval) { - ext4_setent(handle, &old, - old.inode->i_ino, old_file_type); + ext4_resetent(handle, &old, + old.inode->i_ino, old_file_type); drop_nlink(whiteout); } unlock_new_inode(whiteout); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9a6f9875aa34..a1353b0825ea 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -4875,7 +4875,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) set_task_ioprio(sbi->s_journal->j_task, journal_ioprio); - sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; sbi->s_journal->j_submit_inode_data_buffers = ext4_journal_submit_inode_data_buffers; sbi->s_journal->j_finish_inode_data_buffers = @@ -4987,6 +4986,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount5; } + /* + * We can only set up the journal commit callback once + * mballoc is initialized + */ + if (sbi->s_journal) + sbi->s_journal->j_commit_callback = + ext4_journal_commit_callback; + block = ext4_count_free_clusters(sb); ext4_free_blocks_count_set(sbi->s_es, EXT4_C2B(sbi, block)); @@ -5142,8 +5149,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) failed_mount3a: ext4_es_unregister_shrinker(sbi); failed_mount3: - del_timer_sync(&sbi->s_err_report); flush_work(&sbi->s_error_work); + del_timer_sync(&sbi->s_err_report); if (sbi->s_mmp_tsk) kthread_stop(sbi->s_mmp_tsk); failed_mount2: diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index 5b7ba8f71153..00e3cbde472e 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -201,55 +201,76 @@ static int ext4_end_enable_verity(struct file *filp, const void *desc, struct inode *inode = file_inode(filp); const int credits = 2; /* superblock and inode for ext4_orphan_del() */ handle_t *handle; + struct ext4_iloc iloc; int err = 0; - int err2; - if (desc != NULL) { - /* Succeeded; write the verity descriptor. */ - err = ext4_write_verity_descriptor(inode, desc, desc_size, - merkle_tree_size); - - /* Write all pages before clearing VERITY_IN_PROGRESS. */ - if (!err) - err = filemap_write_and_wait(inode->i_mapping); - } + /* + * If an error already occurred (which fs/verity/ signals by passing + * desc == NULL), then only clean-up is needed. + */ + if (desc == NULL) + goto cleanup; - /* If we failed, truncate anything we wrote past i_size. */ - if (desc == NULL || err) - ext4_truncate(inode); + /* Append the verity descriptor. */ + err = ext4_write_verity_descriptor(inode, desc, desc_size, + merkle_tree_size); + if (err) + goto cleanup; /* - * We must always clean up by clearing EXT4_STATE_VERITY_IN_PROGRESS and - * deleting the inode from the orphan list, even if something failed. - * If everything succeeded, we'll also set the verity bit in the same - * transaction. + * Write all pages (both data and verity metadata). Note that this must + * happen before clearing EXT4_STATE_VERITY_IN_PROGRESS; otherwise pages + * beyond i_size won't be written properly. For crash consistency, this + * also must happen before the verity inode flag gets persisted. */ + err = filemap_write_and_wait(inode->i_mapping); + if (err) + goto cleanup; - ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); + /* + * Finally, set the verity inode flag and remove the inode from the + * orphan list (in a single transaction). + */ handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); if (IS_ERR(handle)) { - ext4_orphan_del(NULL, inode); - return PTR_ERR(handle); + err = PTR_ERR(handle); + goto cleanup; } - err2 = ext4_orphan_del(handle, inode); - if (err2) - goto out_stop; + err = ext4_orphan_del(handle, inode); + if (err) + goto stop_and_cleanup; - if (desc != NULL && !err) { - struct ext4_iloc iloc; + err = ext4_reserve_inode_write(handle, inode, &iloc); + if (err) + goto stop_and_cleanup; - err = ext4_reserve_inode_write(handle, inode, &iloc); - if (err) - goto out_stop; - ext4_set_inode_flag(inode, EXT4_INODE_VERITY); - ext4_set_inode_flags(inode, false); - err = ext4_mark_iloc_dirty(handle, inode, &iloc); - } -out_stop: + ext4_set_inode_flag(inode, EXT4_INODE_VERITY); + ext4_set_inode_flags(inode, false); + err = ext4_mark_iloc_dirty(handle, inode, &iloc); + if (err) + goto stop_and_cleanup; + + ext4_journal_stop(handle); + + ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); + return 0; + +stop_and_cleanup: ext4_journal_stop(handle); - return err ?: err2; +cleanup: + /* + * Verity failed to be enabled, so clean up by truncating any verity + * metadata that was written beyond i_size (both from cache and from + * disk), removing the inode from the orphan list (if it wasn't done + * already), and clearing EXT4_STATE_VERITY_IN_PROGRESS. + */ + truncate_inode_pages(inode->i_mapping, inode->i_size); + ext4_truncate(inode); + ext4_orphan_del(NULL, inode); + ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); + return err; } static int ext4_get_verity_descriptor_location(struct inode *inode, diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 372208500f4e..6c1018223c54 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1462,6 +1462,9 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value, if (!ce) return NULL; + WARN_ON_ONCE(ext4_handle_valid(journal_current_handle()) && + !(current->flags & PF_MEMALLOC_NOFS)); + ea_data = kvmalloc(value_len, GFP_KERNEL); if (!ea_data) { mb_cache_entry_put(ea_inode_cache, ce); @@ -2327,6 +2330,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, error = -ENOSPC; goto cleanup; } + WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS)); } error = ext4_reserve_inode_write(handle, inode, &is.iloc); @@ -2400,7 +2404,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, * external inode if possible. */ if (ext4_has_feature_ea_inode(inode->i_sb) && - !i.in_inode) { + i.value_len && !i.in_inode) { i.in_inode = 1; goto retry_inode; } diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 4bcbacfe3325..8d044de865d8 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -321,22 +321,22 @@ static const struct f2fs_compress_ops f2fs_lz4_ops = { static int zstd_init_compress_ctx(struct compress_ctx *cc) { - ZSTD_parameters params; - ZSTD_CStream *stream; + zstd_parameters params; + zstd_cstream *stream; void *workspace; unsigned int workspace_size; - params = ZSTD_getParams(F2FS_ZSTD_DEFAULT_CLEVEL, cc->rlen, 0); - workspace_size = ZSTD_CStreamWorkspaceBound(params.cParams); + params = zstd_get_params(F2FS_ZSTD_DEFAULT_CLEVEL, cc->rlen); + workspace_size = zstd_cstream_workspace_bound(¶ms.cParams); workspace = f2fs_kvmalloc(F2FS_I_SB(cc->inode), workspace_size, GFP_NOFS); if (!workspace) return -ENOMEM; - stream = ZSTD_initCStream(params, 0, workspace, workspace_size); + stream = zstd_init_cstream(¶ms, 0, workspace, workspace_size); if (!stream) { - printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initCStream failed\n", + printk_ratelimited("%sF2FS-fs (%s): %s zstd_init_cstream failed\n", KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, __func__); kvfree(workspace); @@ -359,9 +359,9 @@ static void zstd_destroy_compress_ctx(struct compress_ctx *cc) static int zstd_compress_pages(struct compress_ctx *cc) { - ZSTD_CStream *stream = cc->private2; - ZSTD_inBuffer inbuf; - ZSTD_outBuffer outbuf; + zstd_cstream *stream = cc->private2; + zstd_in_buffer inbuf; + zstd_out_buffer outbuf; int src_size = cc->rlen; int dst_size = src_size - PAGE_SIZE - COMPRESS_HEADER_SIZE; int ret; @@ -374,19 +374,19 @@ static int zstd_compress_pages(struct compress_ctx *cc) outbuf.dst = cc->cbuf->cdata; outbuf.size = dst_size; - ret = ZSTD_compressStream(stream, &outbuf, &inbuf); - if (ZSTD_isError(ret)) { - printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n", + ret = zstd_compress_stream(stream, &outbuf, &inbuf); + if (zstd_is_error(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s zstd_compress_stream failed, ret: %d\n", KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, - __func__, ZSTD_getErrorCode(ret)); + __func__, zstd_get_error_code(ret)); return -EIO; } - ret = ZSTD_endStream(stream, &outbuf); - if (ZSTD_isError(ret)) { - printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_endStream returned %d\n", + ret = zstd_end_stream(stream, &outbuf); + if (zstd_is_error(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s zstd_end_stream returned %d\n", KERN_ERR, F2FS_I_SB(cc->inode)->sb->s_id, - __func__, ZSTD_getErrorCode(ret)); + __func__, zstd_get_error_code(ret)); return -EIO; } @@ -403,22 +403,22 @@ static int zstd_compress_pages(struct compress_ctx *cc) static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic) { - ZSTD_DStream *stream; + zstd_dstream *stream; void *workspace; unsigned int workspace_size; unsigned int max_window_size = MAX_COMPRESS_WINDOW_SIZE(dic->log_cluster_size); - workspace_size = ZSTD_DStreamWorkspaceBound(max_window_size); + workspace_size = zstd_dstream_workspace_bound(max_window_size); workspace = f2fs_kvmalloc(F2FS_I_SB(dic->inode), workspace_size, GFP_NOFS); if (!workspace) return -ENOMEM; - stream = ZSTD_initDStream(max_window_size, workspace, workspace_size); + stream = zstd_init_dstream(max_window_size, workspace, workspace_size); if (!stream) { - printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initDStream failed\n", + printk_ratelimited("%sF2FS-fs (%s): %s zstd_init_dstream failed\n", KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, __func__); kvfree(workspace); @@ -440,9 +440,9 @@ static void zstd_destroy_decompress_ctx(struct decompress_io_ctx *dic) static int zstd_decompress_pages(struct decompress_io_ctx *dic) { - ZSTD_DStream *stream = dic->private2; - ZSTD_inBuffer inbuf; - ZSTD_outBuffer outbuf; + zstd_dstream *stream = dic->private2; + zstd_in_buffer inbuf; + zstd_out_buffer outbuf; int ret; inbuf.pos = 0; @@ -453,11 +453,11 @@ static int zstd_decompress_pages(struct decompress_io_ctx *dic) outbuf.dst = dic->rbuf; outbuf.size = dic->rlen; - ret = ZSTD_decompressStream(stream, &outbuf, &inbuf); - if (ZSTD_isError(ret)) { - printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_compressStream failed, ret: %d\n", + ret = zstd_decompress_stream(stream, &outbuf, &inbuf); + if (zstd_is_error(ret)) { + printk_ratelimited("%sF2FS-fs (%s): %s zstd_decompress_stream failed, ret: %d\n", KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id, - __func__, ZSTD_getErrorCode(ret)); + __func__, zstd_get_error_code(ret)); return -EIO; } @@ -1415,7 +1415,7 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, ret = f2fs_write_single_data_page(cc->rpages[i], &_submitted, NULL, NULL, wbc, io_type, - compr_blocks); + compr_blocks, false); if (ret) { if (ret == AOP_WRITEPAGE_ACTIVATE) { unlock_page(cc->rpages[i]); @@ -1450,6 +1450,9 @@ static int f2fs_write_raw_pages(struct compress_ctx *cc, *submitted += _submitted; } + + f2fs_balance_fs(F2FS_M_SB(mapping), true); + return 0; out_err: for (++i; i < cc->cluster_size; i++) { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index aa34d620bec9..4d3ebf094f6d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -499,7 +499,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi, if (f2fs_lfs_mode(sbi) && current->plug) blk_finish_plug(current->plug); - if (F2FS_IO_ALIGNED(sbi)) + if (!F2FS_IO_ALIGNED(sbi)) goto submit_io; start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS; @@ -2743,7 +2743,8 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, sector_t *last_block, struct writeback_control *wbc, enum iostat_type io_type, - int compr_blocks) + int compr_blocks, + bool allow_balance) { struct inode *inode = page->mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); @@ -2881,7 +2882,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, } unlock_page(page); if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && - !F2FS_I(inode)->cp_task) + !F2FS_I(inode)->cp_task && allow_balance) f2fs_balance_fs(sbi, need_balance_fs); if (unlikely(f2fs_cp_error(sbi))) { @@ -2928,7 +2929,7 @@ static int f2fs_write_data_page(struct page *page, #endif return f2fs_write_single_data_page(page, NULL, NULL, NULL, - wbc, FS_DATA_IO, 0); + wbc, FS_DATA_IO, 0, true); } /* @@ -3096,7 +3097,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping, } #endif ret = f2fs_write_single_data_page(page, &submitted, - &bio, &last_block, wbc, io_type, 0); + &bio, &last_block, wbc, io_type, + 0, true); if (ret == AOP_WRITEPAGE_ACTIVATE) unlock_page(page); #ifdef CONFIG_F2FS_FS_COMPRESSION diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index bb11759191dc..1578402c5844 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3469,7 +3469,7 @@ int f2fs_write_single_data_page(struct page *page, int *submitted, struct bio **bio, sector_t *last_block, struct writeback_control *wbc, enum iostat_type io_type, - int compr_blocks); + int compr_blocks, bool allow_balance); void f2fs_invalidate_page(struct page *page, unsigned int offset, unsigned int length); int f2fs_release_page(struct page *page, gfp_t wait); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f585545277d7..d5ebc67c7130 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -60,6 +60,9 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf) bool need_alloc = true; int err = 0; + if (unlikely(IS_IMMUTABLE(inode))) + return VM_FAULT_SIGBUS; + if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; goto err; @@ -767,6 +770,10 @@ int f2fs_truncate(struct inode *inode) return -EIO; } + err = dquot_initialize(inode); + if (err) + return err; + /* we should check inline_data size */ if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); @@ -848,7 +855,8 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr) if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + if (!in_group_p(inode->i_gid) && + !capable_wrt_inode_uidgid(inode, CAP_FSETID)) mode &= ~S_ISGID; set_acl_inode(inode, mode); } @@ -865,6 +873,14 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr) if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) return -EIO; + if (unlikely(IS_IMMUTABLE(inode))) + return -EPERM; + + if (unlikely(IS_APPEND(inode) && + (attr->ia_valid & (ATTR_MODE | ATTR_UID | + ATTR_GID | ATTR_TIMES_SET)))) + return -EPERM; + if ((attr->ia_valid & ATTR_SIZE) && !f2fs_is_compress_backend_ready(inode)) return -EOPNOTSUPP; @@ -4043,8 +4059,10 @@ static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len) for (i = 0; i < page_len; i++, redirty_idx++) { page = find_lock_page(mapping, redirty_idx); - if (!page) - ret = -ENOENT; + if (!page) { + ret = -ENOMEM; + break; + } set_page_dirty(page); f2fs_put_page(page, 1); f2fs_put_page(page, 0); @@ -4349,6 +4367,11 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) inode_lock(inode); } + if (unlikely(IS_IMMUTABLE(inode))) { + ret = -EPERM; + goto unlock; + } + ret = generic_write_checks(iocb, from); if (ret > 0) { bool preallocated = false; @@ -4413,6 +4436,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) if (ret > 0) f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret); } +unlock: inode_unlock(inode); out: trace_f2fs_file_write_iter(inode, iocb->ki_pos, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 806ebabf5870..993caefcd2bb 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -192,6 +192,10 @@ int f2fs_convert_inline_inode(struct inode *inode) f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb)) return 0; + err = dquot_initialize(inode); + if (err) + return err; + page = f2fs_grab_cache_page(inode->i_mapping, 0, false); if (!page) return -ENOMEM; diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 6edb1ab579a1..887804968576 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -855,7 +855,11 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, if (whiteout) { f2fs_i_links_write(inode, false); + + spin_lock(&inode->i_lock); inode->i_state |= I_LINKABLE; + spin_unlock(&inode->i_lock); + *whiteout = inode; } else { d_tmpfile(dentry, inode); @@ -1041,7 +1045,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, err = f2fs_add_link(old_dentry, whiteout); if (err) goto put_out_dir; + + spin_lock(&whiteout->i_lock); whiteout->i_state &= ~I_LINKABLE; + spin_unlock(&whiteout->i_lock); + iput(whiteout); } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index e81eb0748e2a..229814b4f4a6 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -101,11 +101,11 @@ static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi, #define BLKS_PER_SEC(sbi) \ ((sbi)->segs_per_sec * (sbi)->blocks_per_seg) #define GET_SEC_FROM_SEG(sbi, segno) \ - ((segno) / (sbi)->segs_per_sec) + (((segno) == -1) ? -1: (segno) / (sbi)->segs_per_sec) #define GET_SEG_FROM_SEC(sbi, secno) \ ((secno) * (sbi)->segs_per_sec) #define GET_ZONE_FROM_SEC(sbi, secno) \ - ((secno) / (sbi)->secs_per_zone) + (((secno) == -1) ? -1: (secno) / (sbi)->secs_per_zone) #define GET_ZONE_FROM_SEG(sbi, segno) \ GET_ZONE_FROM_SEC(sbi, GET_SEC_FROM_SEG(sbi, segno)) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index b4a07fe62d1a..972736d71fa4 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1796,6 +1796,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi) static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi) { + /* we should flush all the data to keep data consistency */ + sync_inodes_sb(sbi->sb); + down_write(&sbi->gc_lock); f2fs_dirty_to_prefree(sbi); diff --git a/fs/file.c b/fs/file.c index dab120b71e44..f3a4bac2cbe9 100644 --- a/fs/file.c +++ b/fs/file.c @@ -22,6 +22,8 @@ #include #include +#include "internal.h" + unsigned int sysctl_nr_open __read_mostly = 1024*1024; unsigned int sysctl_nr_open_min = BITS_PER_LONG; /* our min() is unusable in constant expressions ;-/ */ @@ -732,36 +734,48 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) } /* - * variant of close_fd that gets a ref on the file for later fput. - * The caller must ensure that filp_close() called on the file, and then - * an fput(). + * See close_fd_get_file() below, this variant assumes current->files->file_lock + * is held. */ -int close_fd_get_file(unsigned int fd, struct file **res) +int __close_fd_get_file(unsigned int fd, struct file **res) { struct files_struct *files = current->files; struct file *file; struct fdtable *fdt; - spin_lock(&files->file_lock); fdt = files_fdtable(files); if (fd >= fdt->max_fds) - goto out_unlock; + goto out_err; file = fdt->fd[fd]; if (!file) - goto out_unlock; + goto out_err; rcu_assign_pointer(fdt->fd[fd], NULL); __put_unused_fd(files, fd); - spin_unlock(&files->file_lock); get_file(file); *res = file; return 0; - -out_unlock: - spin_unlock(&files->file_lock); +out_err: *res = NULL; return -ENOENT; } +/* + * variant of close_fd that gets a ref on the file for later fput. + * The caller must ensure that filp_close() called on the file, and then + * an fput(). + */ +int close_fd_get_file(unsigned int fd, struct file **res) +{ + struct files_struct *files = current->files; + int ret; + + spin_lock(&files->file_lock); + ret = __close_fd_get_file(fd, res); + spin_unlock(&files->file_lock); + + return ret; +} + void do_close_on_exec(struct files_struct *files) { unsigned i; diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 7c4b8cb93f9f..103dfc2fa62e 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -863,6 +863,7 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc) static inline void fuse_make_bad(struct inode *inode) { + remove_inode_hash(inode); set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state); } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 62d9081d1e26..a1f9dde33058 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1230,6 +1230,9 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, gfs2_inplace_release(ip); + if (ip->i_qadata && ip->i_qadata->qa_qd_num) + gfs2_quota_unlock(ip); + if (length != written && (iomap->flags & IOMAP_F_NEW)) { /* Deallocate blocks that were just allocated. */ loff_t blockmask = i_blocksize(inode) - 1; @@ -1242,9 +1245,6 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, } } - if (ip->i_qadata && ip->i_qadata->qa_qd_num) - gfs2_quota_unlock(ip); - if (unlikely(!written)) goto out_unlock; diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 9f2b5609f225..153272f82984 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -284,7 +284,6 @@ static void gdlm_put_lock(struct gfs2_glock *gl) { struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; struct lm_lockstruct *ls = &sdp->sd_lockstruct; - int lvb_needs_unlock = 0; int error; if (gl->gl_lksb.sb_lkid == 0) { @@ -297,13 +296,10 @@ static void gdlm_put_lock(struct gfs2_glock *gl) gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_update_request_times(gl); - /* don't want to skip dlm_unlock writing the lvb when lock is ex */ - - if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE)) - lvb_needs_unlock = 1; + /* don't want to skip dlm_unlock writing the lvb when lock has one */ if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && - !lvb_needs_unlock) { + !gl->gl_lksb.sb_lvbptr) { gfs2_glock_free(gl); return; } diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 2e9314091c81..1955dea999f7 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -935,12 +935,16 @@ static void trans_drain(struct gfs2_trans *tr) while (!list_empty(head)) { bd = list_first_entry(head, struct gfs2_bufdata, bd_list); list_del_init(&bd->bd_list); + if (!list_empty(&bd->bd_ail_st_list)) + gfs2_remove_from_ail(bd); kmem_cache_free(gfs2_bufdata_cachep, bd); } head = &tr->tr_databuf; while (!list_empty(head)) { bd = list_first_entry(head, struct gfs2_bufdata, bd_list); list_del_init(&bd->bd_list); + if (!list_empty(&bd->bd_ail_st_list)) + gfs2_remove_from_ail(bd); kmem_cache_free(gfs2_bufdata_cachep, bd); } } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 61fce59cb4d3..f2c6bbe5cdb8 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1084,6 +1084,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) int silent = fc->sb_flags & SB_SILENT; struct gfs2_sbd *sdp; struct gfs2_holder mount_gh; + struct gfs2_holder freeze_gh; int error; sdp = init_sbd(sb); @@ -1195,25 +1196,18 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc) goto fail_per_node; } - if (sb_rdonly(sb)) { - struct gfs2_holder freeze_gh; + error = gfs2_freeze_lock(sdp, &freeze_gh, 0); + if (error) + goto fail_per_node; - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | GL_EXACT, - &freeze_gh); - if (error) { - fs_err(sdp, "can't make FS RO: %d\n", error); - goto fail_per_node; - } - gfs2_glock_dq_uninit(&freeze_gh); - } else { + if (!sb_rdonly(sb)) error = gfs2_make_fs_rw(sdp); - if (error) { - fs_err(sdp, "can't make FS RW: %d\n", error); - goto fail_per_node; - } - } + gfs2_freeze_unlock(&freeze_gh); + if (error) { + fs_err(sdp, "can't make FS RW: %d\n", error); + goto fail_per_node; + } gfs2_glock_dq_uninit(&mount_gh); gfs2_online_uevent(sdp); return 0; @@ -1514,6 +1508,12 @@ static int gfs2_reconfigure(struct fs_context *fc) fc->sb_flags |= SB_RDONLY; if ((sb->s_flags ^ fc->sb_flags) & SB_RDONLY) { + struct gfs2_holder freeze_gh; + + error = gfs2_freeze_lock(sdp, &freeze_gh, 0); + if (error) + return -EINVAL; + if (fc->sb_flags & SB_RDONLY) { error = gfs2_make_fs_ro(sdp); if (error) @@ -1523,6 +1523,7 @@ static int gfs2_reconfigure(struct fs_context *fc) if (error) errorfc(fc, "unable to remount read-write"); } + gfs2_freeze_unlock(&freeze_gh); } sdp->sd_args = *newargs; diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index c26c68ebd29d..8f9c6480a5df 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -470,9 +470,7 @@ void gfs2_recover_func(struct work_struct *work) /* Acquire a shared hold on the freeze lock */ - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | LM_FLAG_PRIORITY | - GL_EXACT, &thaw_gh); + error = gfs2_freeze_lock(sdp, &thaw_gh, LM_FLAG_PRIORITY); if (error) goto fail_gunlock_ji; @@ -514,15 +512,17 @@ void gfs2_recover_func(struct work_struct *work) error = foreach_descriptor(jd, head.lh_tail, head.lh_blkno, pass); lops_after_scan(jd, error, pass); - if (error) + if (error) { + up_read(&sdp->sd_log_flush_lock); goto fail_gunlock_thaw; + } } recover_local_statfs(jd, &head); clean_journal(jd, &head); up_read(&sdp->sd_log_flush_lock); - gfs2_glock_dq_uninit(&thaw_gh); + gfs2_freeze_unlock(&thaw_gh); t_rep = ktime_get(); fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, " "jhead:%lldms, tlck:%lldms, replay:%lldms]\n", @@ -544,7 +544,7 @@ void gfs2_recover_func(struct work_struct *work) goto done; fail_gunlock_thaw: - gfs2_glock_dq_uninit(&thaw_gh); + gfs2_freeze_unlock(&thaw_gh); fail_gunlock_ji: if (jlocked) { gfs2_glock_dq_uninit(&ji_gh); diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 2f56acc41c04..754ea2a137b4 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -165,7 +165,6 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) { struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode); struct gfs2_glock *j_gl = ip->i_gl; - struct gfs2_holder freeze_gh; struct gfs2_log_header_host head; int error; @@ -173,12 +172,6 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) if (error) return error; - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | GL_EXACT, - &freeze_gh); - if (error) - goto fail_threads; - j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); if (gfs2_withdrawn(sdp)) { error = -EIO; @@ -205,13 +198,9 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - gfs2_glock_dq_uninit(&freeze_gh); - return 0; fail: - gfs2_glock_dq_uninit(&freeze_gh); -fail_threads: if (sdp->sd_quotad_process) kthread_stop(sdp->sd_quotad_process); sdp->sd_quotad_process = NULL; @@ -452,7 +441,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp) } if (error) - gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); + gfs2_freeze_unlock(&sdp->sd_freeze_gh); out: while (!list_empty(&list)) { @@ -609,30 +598,9 @@ static void gfs2_dirty_inode(struct inode *inode, int flags) int gfs2_make_fs_ro(struct gfs2_sbd *sdp) { - struct gfs2_holder freeze_gh; int error = 0; int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - gfs2_holder_mark_uninitialized(&freeze_gh); - if (sdp->sd_freeze_gl && - !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) { - if (!log_write_allowed) { - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, - LM_ST_SHARED, LM_FLAG_TRY | - LM_FLAG_NOEXP | GL_EXACT, - &freeze_gh); - if (error == GLR_TRYFAILED) - error = 0; - } else { - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, - LM_ST_SHARED, - LM_FLAG_NOEXP | GL_EXACT, - &freeze_gh); - if (error && !gfs2_withdrawn(sdp)) - return error; - } - } - gfs2_flush_delete_work(sdp); if (!log_write_allowed && current == sdp->sd_quotad_process) fs_warn(sdp, "The quotad daemon is withdrawing.\n"); @@ -661,9 +629,6 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp) atomic_read(&sdp->sd_reserving_log) == 0, HZ * 5); } - if (gfs2_holder_initialized(&freeze_gh)) - gfs2_glock_dq_uninit(&freeze_gh); - gfs2_quota_cleanup(sdp); if (!log_write_allowed) @@ -772,10 +737,8 @@ void gfs2_freeze_func(struct work_struct *work) struct super_block *sb = sdp->sd_vfs; atomic_inc(&sb->s_active); - error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, - LM_FLAG_NOEXP | GL_EXACT, &freeze_gh); + error = gfs2_freeze_lock(sdp, &freeze_gh, 0); if (error) { - fs_info(sdp, "GFS2: couldn't get freeze lock : %d\n", error); gfs2_assert_withdraw(sdp, 0); } else { atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN); @@ -785,7 +748,7 @@ void gfs2_freeze_func(struct work_struct *work) error); gfs2_assert_withdraw(sdp, 0); } - gfs2_glock_dq_uninit(&freeze_gh); + gfs2_freeze_unlock(&freeze_gh); } deactivate_super(sb); clear_bit_unlock(SDF_FS_FROZEN, &sdp->sd_flags); @@ -853,7 +816,7 @@ static int gfs2_unfreeze(struct super_block *sb) return 0; } - gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); + gfs2_freeze_unlock(&sdp->sd_freeze_gh); mutex_unlock(&sdp->sd_freeze_mutex); return wait_on_bit(&sdp->sd_flags, SDF_FS_FROZEN, TASK_INTERRUPTIBLE); } diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 6d4bf7ea7b3b..7f850ff6a05d 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -134,6 +134,8 @@ static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl, bd->bd_bh = bh; bd->bd_gl = gl; INIT_LIST_HEAD(&bd->bd_list); + INIT_LIST_HEAD(&bd->bd_ail_st_list); + INIT_LIST_HEAD(&bd->bd_ail_gl_list); bh->b_private = bd; return bd; } diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index a374397f4273..dc4985429cf2 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -91,18 +91,50 @@ int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, return error; } +/** + * gfs2_freeze_lock - hold the freeze glock + * @sdp: the superblock + * @freeze_gh: pointer to the requested holder + * @caller_flags: any additional flags needed by the caller + */ +int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh, + int caller_flags) +{ + int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags; + int error; + + error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags, + freeze_gh); + if (error && error != GLR_TRYFAILED) + fs_err(sdp, "can't lock the freeze lock: %d\n", error); + return error; +} + +void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh) +{ + if (gfs2_holder_initialized(freeze_gh)) + gfs2_glock_dq_uninit(freeze_gh); +} + static void signal_our_withdraw(struct gfs2_sbd *sdp) { - struct gfs2_glock *gl = sdp->sd_live_gh.gh_gl; - struct inode *inode = sdp->sd_jdesc->jd_inode; - struct gfs2_inode *ip = GFS2_I(inode); - u64 no_formal_ino = ip->i_no_formal_ino; + struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl; + struct inode *inode; + struct gfs2_inode *ip; + struct gfs2_glock *i_gl; + u64 no_formal_ino; + int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); int ret = 0; int tries; - if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) + if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc) return; + inode = sdp->sd_jdesc->jd_inode; + ip = GFS2_I(inode); + i_gl = ip->i_gl; + no_formal_ino = ip->i_no_formal_ino; + /* Prevent any glock dq until withdraw recovery is complete */ set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); /* @@ -117,8 +149,21 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) * therefore we need to clear SDF_JOURNAL_LIVE manually. */ clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); - if (!sb_rdonly(sdp->sd_vfs)) - ret = gfs2_make_fs_ro(sdp); + if (!sb_rdonly(sdp->sd_vfs)) { + struct gfs2_holder freeze_gh; + + gfs2_holder_mark_uninitialized(&freeze_gh); + if (sdp->sd_freeze_gl && + !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) { + ret = gfs2_freeze_lock(sdp, &freeze_gh, + log_write_allowed ? 0 : LM_FLAG_TRY); + if (ret == GLR_TRYFAILED) + ret = 0; + } + if (!ret) + ret = gfs2_make_fs_ro(sdp); + gfs2_freeze_unlock(&freeze_gh); + } if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ if (!ret) @@ -141,7 +186,8 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); thaw_super(sdp->sd_vfs); } else { - wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE); + wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE, + TASK_UNINTERRUPTIBLE); } /* @@ -161,15 +207,15 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) * on other nodes to be successful, otherwise we remain the owner of * the glock as far as dlm is concerned. */ - if (gl->gl_ops->go_free) { - set_bit(GLF_FREEING, &gl->gl_flags); - wait_on_bit(&gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE); + if (i_gl->gl_ops->go_free) { + set_bit(GLF_FREEING, &i_gl->gl_flags); + wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE); } /* * Dequeue the "live" glock, but keep a reference so it's never freed. */ - gfs2_glock_hold(gl); + gfs2_glock_hold(live_gl); gfs2_glock_dq_wait(&sdp->sd_live_gh); /* * We enqueue the "live" glock in EX so that all other nodes @@ -208,7 +254,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) gfs2_glock_nq(&sdp->sd_live_gh); } - gfs2_glock_queue_put(gl); /* drop the extra reference we acquired */ + gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */ clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); /* diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index a4443dd8a94b..69e1a0ae5a4d 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -149,6 +149,9 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, bool verbose); +extern int gfs2_freeze_lock(struct gfs2_sbd *sdp, + struct gfs2_holder *freeze_gh, int caller_flags); +extern void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh); #define gfs2_io_error(sdp) \ gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__) diff --git a/fs/internal.h b/fs/internal.h index 77c50befbfbe..c6c85f6ad598 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -132,6 +132,7 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *, const char *, const struct open_flags *); extern struct open_how build_open_how(int flags, umode_t mode); extern int build_open_flags(const struct open_how *how, struct open_flags *op); +extern int __close_fd_get_file(unsigned int fd, struct file **res); long do_sys_ftruncate(unsigned int fd, loff_t length, int small); int chmod_common(const struct path *path, umode_t mode); diff --git a/fs/io-wq.c b/fs/io-wq.c index a564f36e260c..63ef195b1acb 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -555,23 +555,21 @@ static void io_worker_handle_work(struct io_worker *worker) /* handle a whole dependent link */ do { - struct io_wq_work *old_work, *next_hashed, *linked; + struct io_wq_work *next_hashed, *linked; unsigned int hash = io_get_work_hash(work); next_hashed = wq_next_work(work); io_impersonate_work(worker, work); + wq->do_work(work); + io_assign_current_work(worker, NULL); - old_work = work; - linked = wq->do_work(work); - + linked = wq->free_work(work); work = next_hashed; if (!work && linked && !io_wq_is_hashed(linked)) { work = linked; linked = NULL; } io_assign_current_work(worker, work); - wq->free_work(old_work); - if (linked) io_wqe_enqueue(wqe, linked); @@ -850,11 +848,9 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe) struct io_wq *wq = wqe->wq; do { - struct io_wq_work *old_work = work; - work->flags |= IO_WQ_WORK_CANCEL; - work = wq->do_work(work); - wq->free_work(old_work); + wq->do_work(work); + work = wq->free_work(work); } while (work); } @@ -944,7 +940,6 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data) */ spin_lock_irqsave(&worker->lock, flags); if (worker->cur_work && - !(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL) && match->fn(worker->cur_work, match->data)) { send_sig(SIGINT, worker->task, 1); match->nr_running++; diff --git a/fs/io-wq.h b/fs/io-wq.h index b158f8addcf3..e37a0f217cc8 100644 --- a/fs/io-wq.h +++ b/fs/io-wq.h @@ -9,7 +9,6 @@ enum { IO_WQ_WORK_CANCEL = 1, IO_WQ_WORK_HASHED = 2, IO_WQ_WORK_UNBOUND = 4, - IO_WQ_WORK_NO_CANCEL = 8, IO_WQ_WORK_CONCURRENT = 16, IO_WQ_WORK_FILES = 32, @@ -107,8 +106,8 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) return container_of(work->list.next, struct io_wq_work, list); } -typedef void (free_work_fn)(struct io_wq_work *); -typedef struct io_wq_work *(io_wq_work_fn)(struct io_wq_work *); +typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *); +typedef void (io_wq_work_fn)(struct io_wq_work *); struct io_wq_data { struct user_struct *user; diff --git a/fs/io_uring.c b/fs/io_uring.c index 931671082e61..5c4378694d54 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -411,7 +411,6 @@ struct io_poll_remove { struct io_close { struct file *file; - struct file *put_file; int fd; }; @@ -908,8 +907,6 @@ static const struct io_op_def io_op_defs[] = { IO_WQ_WORK_FS | IO_WQ_WORK_MM, }, [IORING_OP_CLOSE] = { - .needs_file = 1, - .needs_file_no_error = 1, .work_flags = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG, }, [IORING_OP_FILES_UPDATE] = { @@ -996,9 +993,9 @@ enum io_mem_account { ACCT_PINNED, }; -static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, - struct task_struct *task); - +static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, + struct task_struct *task, + struct files_struct *files); static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node); static struct fixed_file_ref_node *alloc_fixed_file_ref_node( struct io_ring_ctx *ctx); @@ -1826,18 +1823,22 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, return all_flushed; } -static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, +static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force, struct task_struct *tsk, struct files_struct *files) { + bool ret = true; + if (test_bit(0, &ctx->cq_check_overflow)) { /* iopoll syncs against uring_lock, not completion_lock */ if (ctx->flags & IORING_SETUP_IOPOLL) mutex_lock(&ctx->uring_lock); - __io_cqring_overflow_flush(ctx, force, tsk, files); + ret = __io_cqring_overflow_flush(ctx, force, tsk, files); if (ctx->flags & IORING_SETUP_IOPOLL) mutex_unlock(&ctx->uring_lock); } + + return ret; } static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) @@ -2172,6 +2173,16 @@ static int io_req_task_work_add(struct io_kiocb *req) return ret; } +static void io_req_task_work_add_fallback(struct io_kiocb *req, + void (*cb)(struct callback_head *)) +{ + struct task_struct *tsk = io_wq_get_task(req->ctx->io_wq); + + init_task_work(&req->task_work, cb); + task_work_add(tsk, &req->task_work, TWA_NONE); + wake_up_process(tsk); +} + static void __io_req_task_cancel(struct io_kiocb *req, int error) { struct io_ring_ctx *ctx = req->ctx; @@ -2191,7 +2202,9 @@ static void io_req_task_cancel(struct callback_head *cb) struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work); struct io_ring_ctx *ctx = req->ctx; + mutex_lock(&ctx->uring_lock); __io_req_task_cancel(req, -ECANCELED); + mutex_unlock(&ctx->uring_lock); percpu_ref_put(&ctx->refs); } @@ -2208,6 +2221,7 @@ static void __io_req_task_submit(struct io_kiocb *req) __io_req_task_cancel(req, -EFAULT); mutex_unlock(&ctx->uring_lock); + ctx->flags &= ~IORING_SETUP_R_DISABLED; if (ctx->flags & IORING_SETUP_SQPOLL) io_sq_thread_drop_mm_files(); } @@ -2229,14 +2243,8 @@ static void io_req_task_queue(struct io_kiocb *req) percpu_ref_get(&req->ctx->refs); ret = io_req_task_work_add(req); - if (unlikely(ret)) { - struct task_struct *tsk; - - init_task_work(&req->task_work, io_req_task_cancel); - tsk = io_wq_get_task(req->ctx->io_wq); - task_work_add(tsk, &req->task_work, TWA_NONE); - wake_up_process(tsk); - } + if (unlikely(ret)) + io_req_task_work_add_fallback(req, io_req_task_cancel); } static inline void io_queue_next(struct io_kiocb *req) @@ -2354,13 +2362,8 @@ static void io_free_req_deferred(struct io_kiocb *req) init_task_work(&req->task_work, io_put_req_deferred_cb); ret = io_req_task_work_add(req); - if (unlikely(ret)) { - struct task_struct *tsk; - - tsk = io_wq_get_task(req->ctx->io_wq); - task_work_add(tsk, &req->task_work, TWA_NONE); - wake_up_process(tsk); - } + if (unlikely(ret)) + io_req_task_work_add_fallback(req, io_put_req_deferred_cb); } static inline void io_put_req_deferred(struct io_kiocb *req, int refs) @@ -2369,22 +2372,6 @@ static inline void io_put_req_deferred(struct io_kiocb *req, int refs) io_free_req_deferred(req); } -static struct io_wq_work *io_steal_work(struct io_kiocb *req) -{ - struct io_kiocb *nxt; - - /* - * A ref is owned by io-wq in which context we're. So, if that's the - * last one, it's safe to steal next work. False negatives are Ok, - * it just will be re-punted async in io_put_work() - */ - if (refcount_read(&req->refs) != 1) - return NULL; - - nxt = io_req_find_next(req); - return nxt ? &nxt->work : NULL; -} - static void io_double_put_req(struct io_kiocb *req) { /* drop both submit and complete references */ @@ -2735,6 +2722,13 @@ static bool io_rw_reissue(struct io_kiocb *req, long res) return false; if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker()) return false; + /* + * If ref is dying, we might be running poll reap from the exit work. + * Don't attempt to reissue from that path, just let it fail with + * -EAGAIN. + */ + if (percpu_ref_is_dying(&req->ctx->refs)) + return false; lockdep_assert_held(&req->ctx->uring_lock); @@ -3439,15 +3433,8 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode, /* submit ref gets dropped, acquire a new one */ refcount_inc(&req->refs); ret = io_req_task_work_add(req); - if (unlikely(ret)) { - struct task_struct *tsk; - - /* queue just for cancelation */ - init_task_work(&req->task_work, io_req_task_cancel); - tsk = io_wq_get_task(req->ctx->io_wq); - task_work_add(tsk, &req->task_work, TWA_NONE); - wake_up_process(tsk); - } + if (unlikely(ret)) + io_req_task_work_add_fallback(req, io_req_task_cancel); return 1; } @@ -3532,7 +3519,6 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, else kiocb->ki_flags |= IOCB_NOWAIT; - /* If the file doesn't support async, just async punt */ no_async = force_nonblock && !io_file_supports_async(req->file, READ); if (no_async) @@ -3544,9 +3530,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, ret = io_iter_do_read(req, iter); - if (!ret) { - goto done; - } else if (ret == -EIOCBQUEUED) { + if (ret == -EIOCBQUEUED) { ret = 0; goto out_free; } else if (ret == -EAGAIN) { @@ -3560,7 +3544,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, iov_iter_revert(iter, io_size - iov_iter_count(iter)); ret = 0; goto copy_iov; - } else if (ret < 0) { + } else if (ret <= 0) { /* make sure -ERESTARTSYS -> -EINTR is done */ goto done; } @@ -3604,6 +3588,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock, goto out_free; } else if (ret > 0 && ret < io_size) { /* we got some bytes, but not all. retry. */ + kiocb->ki_flags &= ~IOCB_WAITQ; goto retry; } done: @@ -4229,6 +4214,7 @@ static int io_remove_buffers(struct io_kiocb *req, bool force_nonblock, static int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { + unsigned long size; struct io_provide_buf *p = &req->pbuf; u64 tmp; @@ -4242,7 +4228,8 @@ static int io_provide_buffers_prep(struct io_kiocb *req, p->addr = READ_ONCE(sqe->addr); p->len = READ_ONCE(sqe->len); - if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs))) + size = (unsigned long)p->len * p->nbufs; + if (!access_ok(u64_to_user_ptr(p->addr), size)) return -EFAULT; p->bgid = READ_ONCE(sqe->buf_group); @@ -4481,13 +4468,6 @@ static int io_statx(struct io_kiocb *req, bool force_nonblock) static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { - /* - * If we queue this for async, it must not be cancellable. That would - * leave the 'file' in an undeterminate state, and here need to modify - * io_wq_work.flags, so initialize io_wq_work firstly. - */ - io_req_init_async(req); - if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->ioprio || sqe->off || sqe->addr || sqe->len || @@ -4497,43 +4477,59 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EBADF; req->close.fd = READ_ONCE(sqe->fd); - if ((req->file && req->file->f_op == &io_uring_fops)) - return -EBADF; - - req->close.put_file = NULL; return 0; } static int io_close(struct io_kiocb *req, bool force_nonblock, struct io_comp_state *cs) { + struct files_struct *files = current->files; struct io_close *close = &req->close; + struct fdtable *fdt; + struct file *file; int ret; - /* might be already done during nonblock submission */ - if (!close->put_file) { - ret = close_fd_get_file(close->fd, &close->put_file); - if (ret < 0) - return (ret == -ENOENT) ? -EBADF : ret; + file = NULL; + ret = -EBADF; + spin_lock(&files->file_lock); + fdt = files_fdtable(files); + if (close->fd >= fdt->max_fds) { + spin_unlock(&files->file_lock); + goto err; + } + file = fdt->fd[close->fd]; + if (!file) { + spin_unlock(&files->file_lock); + goto err; + } + + if (file->f_op == &io_uring_fops) { + spin_unlock(&files->file_lock); + file = NULL; + goto err; } /* if the file has a flush method, be safe and punt to async */ - if (close->put_file->f_op->flush && force_nonblock) { - /* not safe to cancel at this point */ - req->work.flags |= IO_WQ_WORK_NO_CANCEL; - /* was never set, but play safe */ - req->flags &= ~REQ_F_NOWAIT; - /* avoid grabbing files - we don't need the files */ - req->flags |= REQ_F_NO_FILE_TABLE; + if (file->f_op->flush && force_nonblock) { + spin_unlock(&files->file_lock); return -EAGAIN; } + ret = __close_fd_get_file(close->fd, &file); + spin_unlock(&files->file_lock); + if (ret < 0) { + if (ret == -ENOENT) + ret = -EBADF; + goto err; + } + /* No ->flush() or already async, safely close from here */ - ret = filp_close(close->put_file, req->work.identity->files); + ret = filp_close(file, current->files); +err: if (ret < 0) req_set_fail_links(req); - fput(close->put_file); - close->put_file = NULL; + if (file) + fput(file); __io_req_complete(req, ret, 0, cs); return 0; } @@ -5159,12 +5155,8 @@ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll, */ ret = io_req_task_work_add(req); if (unlikely(ret)) { - struct task_struct *tsk; - WRITE_ONCE(poll->canceled, true); - tsk = io_wq_get_task(req->ctx->io_wq); - task_work_add(tsk, &req->task_work, TWA_NONE); - wake_up_process(tsk); + io_req_task_work_add_fallback(req, func); } return 1; } @@ -5316,6 +5308,9 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt, pt->error = -EINVAL; return; } + /* double add on the same waitqueue head, ignore */ + if (poll->head == head) + return; poll = kmalloc(sizeof(*poll), GFP_ATOMIC); if (!poll) { pt->error = -ENOMEM; @@ -6381,7 +6376,7 @@ static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock, return 0; } -static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work) +static void io_wq_submit_work(struct io_wq_work *work) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); struct io_kiocb *timeout; @@ -6391,10 +6386,12 @@ static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work) if (timeout) io_queue_linked_timeout(timeout); - /* if NO_CANCEL is set, we must still run the work */ - if ((work->flags & (IO_WQ_WORK_CANCEL|IO_WQ_WORK_NO_CANCEL)) == - IO_WQ_WORK_CANCEL) { - ret = -ECANCELED; + if (work->flags & IO_WQ_WORK_CANCEL) { + /* io-wq is going to take down one */ + refcount_inc(&req->refs); + percpu_ref_get(&req->ctx->refs); + io_req_task_work_add_fallback(req, io_req_task_cancel); + return; } if (!ret) { @@ -6435,8 +6432,6 @@ static struct io_wq_work *io_wq_submit_work(struct io_wq_work *work) if (lock_ctx) mutex_unlock(&lock_ctx->uring_lock); } - - return io_steal_work(req); } static inline struct file *io_file_from_index(struct io_ring_ctx *ctx, @@ -6503,9 +6498,10 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer) if (prev) { req_set_fail_links(prev); io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME); - io_put_req(prev); + io_put_req_deferred(prev, 1); } else { - io_req_complete(req, -ETIME); + io_cqring_add_event(req, -ETIME, 0); + io_put_req_deferred(req, 1); } return HRTIMER_NORESTART; } @@ -7217,6 +7213,25 @@ static int io_run_task_work_sig(void) return -EINTR; } +/* when returns >0, the caller should retry */ +static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, + struct io_wait_queue *iowq, + signed long *timeout) +{ + int ret; + + /* make sure we run task_work before checking for signals */ + ret = io_run_task_work_sig(); + if (ret || io_should_wake(iowq)) + return ret; + /* let the caller flush overflows, retry */ + if (test_bit(0, &ctx->cq_check_overflow)) + return 1; + + *timeout = schedule_timeout(*timeout); + return !*timeout ? -ETIME : 1; +} + /* * Wait until events become available, if we don't already have some. The * application must reap them itself, as they reside on the shared cq ring. @@ -7235,9 +7250,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, .to_wait = min_events, }; struct io_rings *rings = ctx->rings; - struct timespec64 ts; - signed long timeout = 0; - int ret = 0; + signed long timeout = MAX_SCHEDULE_TIMEOUT; + int ret; do { io_cqring_overflow_flush(ctx, false, NULL, NULL); @@ -7261,6 +7275,8 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, } if (uts) { + struct timespec64 ts; + if (get_timespec64(&ts, uts)) return -EFAULT; timeout = timespec64_to_jiffies(&ts); @@ -7269,34 +7285,17 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts); trace_io_uring_cqring_wait(ctx, min_events); do { - io_cqring_overflow_flush(ctx, false, NULL, NULL); - prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, - TASK_INTERRUPTIBLE); - /* make sure we run task_work before checking for signals */ - ret = io_run_task_work_sig(); - if (ret > 0) { - finish_wait(&ctx->wait, &iowq.wq); - continue; - } - else if (ret < 0) - break; - if (io_should_wake(&iowq)) + /* if we can't even flush overflow, don't wait for more */ + if (!io_cqring_overflow_flush(ctx, false, NULL, NULL)) { + ret = -EBUSY; break; - if (test_bit(0, &ctx->cq_check_overflow)) { - finish_wait(&ctx->wait, &iowq.wq); - continue; - } - if (uts) { - timeout = schedule_timeout(timeout); - if (timeout == 0) { - ret = -ETIME; - break; - } - } else { - schedule(); } - } while (1); - finish_wait(&ctx->wait, &iowq.wq); + prepare_to_wait_exclusive(&ctx->wait, &iowq.wq, + TASK_INTERRUPTIBLE); + ret = io_cqring_wait_schedule(ctx, &iowq, &timeout); + finish_wait(&ctx->wait, &iowq.wq); + cond_resched(); + } while (ret > 0); restore_saved_sigmask_unless(ret == -EINTR); @@ -8067,12 +8066,12 @@ static int io_sqe_files_update(struct io_ring_ctx *ctx, void __user *arg, return __io_sqe_files_update(ctx, &up, nr_args); } -static void io_free_work(struct io_wq_work *work) +static struct io_wq_work *io_free_work(struct io_wq_work *work) { struct io_kiocb *req = container_of(work, struct io_kiocb, work); - /* Consider that io_steal_work() relies on this ref */ - io_put_req(req); + req = io_put_req_find_next(req); + return req ? &req->work : NULL; } static int io_init_wq_offload(struct io_ring_ctx *ctx, @@ -8723,8 +8722,21 @@ static __poll_t io_uring_poll(struct file *file, poll_table *wait) smp_rmb(); if (!io_sqring_full(ctx)) mask |= EPOLLOUT | EPOLLWRNORM; - io_cqring_overflow_flush(ctx, false, NULL, NULL); - if (io_cqring_events(ctx)) + + /* + * Don't flush cqring overflow list here, just do a simple check. + * Otherwise there could possible be ABBA deadlock: + * CPU0 CPU1 + * ---- ---- + * lock(&ctx->uring_lock); + * lock(&ep->mtx); + * lock(&ctx->uring_lock); + * lock(&ep->mtx); + * + * Users may get EPOLLIN meanwhile seeing nothing in cqring, this + * pushs them to do the flush. + */ + if (io_cqring_events(ctx) || test_bit(0, &ctx->cq_check_overflow)) mask |= EPOLLIN | EPOLLRDNORM; return mask; @@ -8763,7 +8775,7 @@ static void io_ring_exit_work(struct work_struct *work) * as nobody else will be looking for them. */ do { - __io_uring_cancel_task_requests(ctx, NULL); + io_uring_try_cancel_requests(ctx, NULL, NULL); } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20)); io_ring_ctx_free(ctx); } @@ -8851,11 +8863,11 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data) return ret; } -static void io_cancel_defer_files(struct io_ring_ctx *ctx, +static bool io_cancel_defer_files(struct io_ring_ctx *ctx, struct task_struct *task, struct files_struct *files) { - struct io_defer_entry *de = NULL; + struct io_defer_entry *de; LIST_HEAD(list); spin_lock_irq(&ctx->completion_lock); @@ -8866,6 +8878,8 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx, } } spin_unlock_irq(&ctx->completion_lock); + if (list_empty(&list)) + return false; while (!list_empty(&list)) { de = list_first_entry(&list, struct io_defer_entry, list); @@ -8875,6 +8889,43 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx, io_req_complete(de->req, -ECANCELED); kfree(de); } + return true; +} + +static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx, + struct task_struct *task, + struct files_struct *files) +{ + struct io_task_cancel cancel = { .task = task, .files = files, }; + + while (1) { + enum io_wq_cancel cret; + bool ret = false; + + if (ctx->io_wq) { + cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, + &cancel, true); + ret |= (cret != IO_WQ_CANCEL_NOTFOUND); + } + + /* SQPOLL thread does its own polling */ + if ((!(ctx->flags & IORING_SETUP_SQPOLL) && !files) || + (ctx->sq_data && ctx->sq_data->thread == current)) { + while (!list_empty_careful(&ctx->iopoll_list)) { + io_iopoll_try_reap_events(ctx); + ret = true; + } + } + + ret |= io_cancel_defer_files(ctx, task, files); + ret |= io_poll_remove_all(ctx, task, files); + ret |= io_kill_timeouts(ctx, task, files); + ret |= io_run_task_work(); + io_cqring_overflow_flush(ctx, true, task, files); + if (!ret) + break; + cond_resched(); + } } static int io_uring_count_inflight(struct io_ring_ctx *ctx, @@ -8896,7 +8947,6 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct files_struct *files) { while (!list_empty_careful(&ctx->inflight_list)) { - struct io_task_cancel cancel = { .task = task, .files = files }; DEFINE_WAIT(wait); int inflight; @@ -8904,49 +8954,17 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, if (!inflight) break; - io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true); - io_poll_remove_all(ctx, task, files); - io_kill_timeouts(ctx, task, files); - io_cqring_overflow_flush(ctx, true, task, files); - /* cancellations _may_ trigger task work */ - io_run_task_work(); + io_uring_try_cancel_requests(ctx, task, files); + if (ctx->sq_data) + io_sq_thread_unpark(ctx->sq_data); prepare_to_wait(&task->io_uring->wait, &wait, TASK_UNINTERRUPTIBLE); if (inflight == io_uring_count_inflight(ctx, task, files)) schedule(); finish_wait(&task->io_uring->wait, &wait); - } -} - -static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx, - struct task_struct *task) -{ - while (1) { - struct io_task_cancel cancel = { .task = task, .files = NULL, }; - enum io_wq_cancel cret; - bool ret = false; - - if (ctx->io_wq) { - cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, - &cancel, true); - ret |= (cret != IO_WQ_CANCEL_NOTFOUND); - } - - /* SQPOLL thread does its own polling */ - if (!(ctx->flags & IORING_SETUP_SQPOLL)) { - while (!list_empty_careful(&ctx->iopoll_list)) { - io_iopoll_try_reap_events(ctx); - ret = true; - } - } - - ret |= io_poll_remove_all(ctx, task, NULL); - ret |= io_kill_timeouts(ctx, task, NULL); - ret |= io_run_task_work(); - if (!ret) - break; - cond_resched(); + if (ctx->sq_data) + io_sq_thread_park(ctx->sq_data); } } @@ -8954,6 +8972,8 @@ static void io_disable_sqo_submit(struct io_ring_ctx *ctx) { mutex_lock(&ctx->uring_lock); ctx->sqo_dead = 1; + if (ctx->flags & IORING_SETUP_R_DISABLED) + io_sq_offload_start(ctx); mutex_unlock(&ctx->uring_lock); /* make sure callers enter the ring to get error */ @@ -8978,12 +8998,9 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx, io_sq_thread_park(ctx->sq_data); } - io_cancel_defer_files(ctx, task, files); - io_cqring_overflow_flush(ctx, true, task, files); - io_uring_cancel_files(ctx, task, files); if (!files) - __io_uring_cancel_task_requests(ctx, task); + io_uring_try_cancel_requests(ctx, task, NULL); if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) { atomic_dec(&task->io_uring->in_idle); @@ -9970,10 +9987,7 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx) if (ctx->restrictions.registered) ctx->restricted = 1; - ctx->flags &= ~IORING_SETUP_R_DISABLED; - io_sq_offload_start(ctx); - return 0; } diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index f0fe641893a5..b9e6a7ec78be 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -152,6 +152,7 @@ static int do_isofs_readdir(struct inode *inode, struct file *file, printk(KERN_NOTICE "iso9660: Corrupted directory entry" " in block %lu of inode %lu\n", block, inode->i_ino); + brelse(bh); return -EIO; } diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index 402769881c32..58f80e1b3ac0 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c @@ -102,6 +102,7 @@ isofs_find_entry(struct inode *dir, struct dentry *dentry, printk(KERN_NOTICE "iso9660: Corrupted directory entry" " in block %lu of inode %lu\n", block, dir->i_ino); + brelse(bh); return 0; } diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c index be7c8a6a5748..4fe64519870f 100644 --- a/fs/jffs2/summary.c +++ b/fs/jffs2/summary.c @@ -783,6 +783,8 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock dbg_summary("Writing unknown RWCOMPAT_COPY node type %x\n", je16_to_cpu(temp->u.nodetype)); jffs2_sum_disable_collecting(c->summary); + /* The above call removes the list, nothing more to do */ + goto bail_rwcompat; } else { BUG(); /* unknown node in summary information */ } @@ -794,6 +796,7 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock c->summary->sum_num--; } + bail_rwcompat: jffs2_sum_reset_collected(c->summary); diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 94b7c1cb5ceb..7aee15608619 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -1656,7 +1656,7 @@ s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen) } else if (rc == -ENOSPC) { /* search for next smaller log2 block */ l2nb = BLKSTOL2(nblocks) - 1; - nblocks = 1 << l2nb; + nblocks = 1LL << l2nb; } else { /* Trim any already allocated blocks */ jfs_error(bmp->db_ipbmap->i_sb, "-EIO\n"); diff --git a/fs/jfs/jfs_filsys.h b/fs/jfs/jfs_filsys.h index 1e899298f7f0..b5d702df7111 100644 --- a/fs/jfs/jfs_filsys.h +++ b/fs/jfs/jfs_filsys.h @@ -268,5 +268,6 @@ * fsck() must be run to repair */ #define FM_EXTENDFS 0x00000008 /* file system extendfs() in progress */ +#define FM_STATE_MAX 0x0000000f /* max value of s_state */ #endif /* _H_JFS_FILSYS */ diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index 2935d4c776ec..5d7d7170c03c 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_filsys.h" @@ -366,6 +367,15 @@ static int chkSuper(struct super_block *sb) sbi->bsize = bsize; sbi->l2bsize = le16_to_cpu(j_sb->s_l2bsize); + /* check some fields for possible corruption */ + if (sbi->l2bsize != ilog2((u32)bsize) || + j_sb->pad != 0 || + le32_to_cpu(j_sb->s_state) > FM_STATE_MAX) { + rc = -EINVAL; + jfs_err("jfs_mount: Mount Failure: superblock is corrupt!"); + goto out; + } + /* * For now, ignore s_pbsize, l2bfactor. All I/O going through buffer * cache. diff --git a/fs/locks.c b/fs/locks.c index 99ca97e81b7a..6125d2de39b8 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1808,9 +1808,6 @@ check_conflicting_open(struct file *filp, const long arg, int flags) if (flags & FL_LAYOUT) return 0; - if (flags & FL_DELEG) - /* We leave these checks to the caller. */ - return 0; if (arg == F_RDLCK) return inode_is_open_for_write(inode) ? -EAGAIN : 0; diff --git a/fs/namei.c b/fs/namei.c index 78443a85480a..dd85e12ac85a 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -669,17 +669,17 @@ static bool legitimize_root(struct nameidata *nd) */ /** - * unlazy_walk - try to switch to ref-walk mode. + * try_to_unlazy - try to switch to ref-walk mode. * @nd: nameidata pathwalk data - * Returns: 0 on success, -ECHILD on failure + * Returns: true on success, false on failure * - * unlazy_walk attempts to legitimize the current nd->path and nd->root + * try_to_unlazy attempts to legitimize the current nd->path and nd->root * for ref-walk mode. * Must be called from rcu-walk context. - * Nothing should touch nameidata between unlazy_walk() failure and + * Nothing should touch nameidata between try_to_unlazy() failure and * terminate_walk(). */ -static int unlazy_walk(struct nameidata *nd) +static bool try_to_unlazy(struct nameidata *nd) { struct dentry *parent = nd->path.dentry; @@ -694,14 +694,14 @@ static int unlazy_walk(struct nameidata *nd) goto out; rcu_read_unlock(); BUG_ON(nd->inode != parent->d_inode); - return 0; + return true; out1: nd->path.mnt = NULL; nd->path.dentry = NULL; out: rcu_read_unlock(); - return -ECHILD; + return false; } /** @@ -792,7 +792,7 @@ static int complete_walk(struct nameidata *nd) */ if (!(nd->flags & (LOOKUP_ROOT | LOOKUP_IS_SCOPED))) nd->root.mnt = NULL; - if (unlikely(unlazy_walk(nd))) + if (!try_to_unlazy(nd)) return -ECHILD; } @@ -1466,7 +1466,7 @@ static struct dentry *lookup_fast(struct nameidata *nd, unsigned seq; dentry = __d_lookup_rcu(parent, &nd->last, &seq); if (unlikely(!dentry)) { - if (unlazy_walk(nd)) + if (!try_to_unlazy(nd)) return ERR_PTR(-ECHILD); return NULL; } @@ -1567,10 +1567,8 @@ static inline int may_lookup(struct nameidata *nd) { if (nd->flags & LOOKUP_RCU) { int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK); - if (err != -ECHILD) + if (err != -ECHILD || !try_to_unlazy(nd)) return err; - if (unlazy_walk(nd)) - return -ECHILD; } return inode_permission(nd->inode, MAY_EXEC); } @@ -1592,7 +1590,7 @@ static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq) // unlazy even if we fail to grab the link - cleanup needs it bool grabbed_link = legitimize_path(nd, link, seq); - if (unlazy_walk(nd) != 0 || !grabbed_link) + if (!try_to_unlazy(nd) != 0 || !grabbed_link) return -ECHILD; if (nd_alloc_stack(nd)) @@ -1634,7 +1632,7 @@ static const char *pick_link(struct nameidata *nd, struct path *link, touch_atime(&last->link); cond_resched(); } else if (atime_needs_update(&last->link, inode)) { - if (unlikely(unlazy_walk(nd))) + if (!try_to_unlazy(nd)) return ERR_PTR(-ECHILD); touch_atime(&last->link); } @@ -1651,11 +1649,8 @@ static const char *pick_link(struct nameidata *nd, struct path *link, get = inode->i_op->get_link; if (nd->flags & LOOKUP_RCU) { res = get(NULL, inode, &last->done); - if (res == ERR_PTR(-ECHILD)) { - if (unlikely(unlazy_walk(nd))) - return ERR_PTR(-ECHILD); + if (res == ERR_PTR(-ECHILD) && try_to_unlazy(nd)) res = get(link->dentry, inode, &last->done); - } } else { res = get(link->dentry, inode, &last->done); } @@ -2195,7 +2190,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) } if (unlikely(!d_can_lookup(nd->path.dentry))) { if (nd->flags & LOOKUP_RCU) { - if (unlazy_walk(nd)) + if (!try_to_unlazy(nd)) return -ECHILD; } return -ENOTDIR; @@ -3129,7 +3124,6 @@ static const char *open_last_lookups(struct nameidata *nd, struct inode *inode; struct dentry *dentry; const char *res; - int error; nd->flags |= op->intent; @@ -3153,9 +3147,8 @@ static const char *open_last_lookups(struct nameidata *nd, } else { /* create side of things */ if (nd->flags & LOOKUP_RCU) { - error = unlazy_walk(nd); - if (unlikely(error)) - return ERR_PTR(error); + if (!try_to_unlazy(nd)) + return ERR_PTR(-ECHILD); } audit_inode(nd->name, dir, AUDIT_INODE_PARENT); /* trailing slashes? */ @@ -3164,9 +3157,7 @@ static const char *open_last_lookups(struct nameidata *nd, } if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { - error = mnt_want_write(nd->path.mnt); - if (!error) - got_write = true; + got_write = !mnt_want_write(nd->path.mnt); /* * do _not_ fail yet - we might not need that or fail with * a different error; let lookup_open() decide; we'll be diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index e2a488d403a6..14a72224b657 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -127,7 +127,7 @@ config PNFS_BLOCK config PNFS_FLEXFILE_LAYOUT tristate depends on NFS_V4_1 && NFS_V3 - default m + default NFS_V4 config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN string "NFSv4.1 Implementation ID Domain" diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ef827ae193d2..4db3018776f6 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1401,6 +1401,15 @@ int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags) goto out; } +static void nfs_mark_dir_for_revalidate(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + spin_lock(&inode->i_lock); + nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE; + spin_unlock(&inode->i_lock); +} + /* * We judge how long we want to trust negative * dentries by looking at the parent inode mtime. @@ -1435,19 +1444,14 @@ nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry, __func__, dentry); return 1; case 0: - nfs_mark_for_revalidate(dir); - if (inode && S_ISDIR(inode->i_mode)) { - /* Purge readdir caches. */ - nfs_zap_caches(inode); - /* - * We can't d_drop the root of a disconnected tree: - * its d_hash is on the s_anon list and d_drop() would hide - * it from shrink_dcache_for_unmount(), leading to busy - * inodes on unmount and further oopses. - */ - if (IS_ROOT(dentry)) - return 1; - } + /* + * We can't d_drop the root of a disconnected tree: + * its d_hash is on the s_anon list and d_drop() would hide + * it from shrink_dcache_for_unmount(), leading to busy + * inodes on unmount and further oopses. + */ + if (inode && IS_ROOT(dentry)) + return 1; dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n", __func__, dentry); return 0; @@ -1525,6 +1529,13 @@ nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry, nfs_free_fattr(fattr); nfs_free_fhandle(fhandle); nfs4_label_free(label); + + /* + * If the lookup failed despite the dentry change attribute being + * a match, then we should revalidate the directory cache. + */ + if (!ret && nfs_verify_change_attribute(dir, dentry->d_time)) + nfs_mark_dir_for_revalidate(dir); return nfs_lookup_revalidate_done(dir, dentry, inode, ret); } @@ -1567,7 +1578,7 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, error = nfs_lookup_verify_inode(inode, flags); if (error) { if (error == -ESTALE) - nfs_zap_caches(dir); + nfs_mark_dir_for_revalidate(dir); goto out_bad; } nfs_advise_use_readdirplus(dir); @@ -2064,7 +2075,6 @@ nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle, dput(parent); return d; out_error: - nfs_mark_for_revalidate(dir); d = ERR_PTR(error); goto out; } diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index ca10072644ff..ed1c83738c30 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -36,6 +36,7 @@ #define NFS3_pagepad_sz (1) /* Page padding */ #define NFS3_fhandle_sz (1+16) #define NFS3_fh_sz (NFS3_fhandle_sz) /* shorthand */ +#define NFS3_post_op_fh_sz (1+NFS3_fh_sz) #define NFS3_sattr_sz (15) #define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2)) #define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2)) @@ -73,7 +74,7 @@ #define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+NFS3_pagepad_sz) #define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+NFS3_pagepad_sz) #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4) -#define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) +#define NFS3_createres_sz (1+NFS3_post_op_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz)) #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) #define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+NFS3_pagepad_sz) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 2f4679a62712..95d3b8540f8e 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5438,15 +5438,16 @@ static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode, if (cache_validity & NFS_INO_INVALID_ATIME) bitmask[1] |= FATTR4_WORD1_TIME_ACCESS; - if (cache_validity & NFS_INO_INVALID_ACCESS) - bitmask[0] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | - FATTR4_WORD1_OWNER_GROUP; - if (cache_validity & NFS_INO_INVALID_ACL) - bitmask[0] |= FATTR4_WORD0_ACL; - if (cache_validity & NFS_INO_INVALID_LABEL) + if (cache_validity & NFS_INO_INVALID_OTHER) + bitmask[1] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | + FATTR4_WORD1_OWNER_GROUP | + FATTR4_WORD1_NUMLINKS; + if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL) bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL; - if (cache_validity & NFS_INO_INVALID_CTIME) + if (cache_validity & NFS_INO_INVALID_CHANGE) bitmask[0] |= FATTR4_WORD0_CHANGE; + if (cache_validity & NFS_INO_INVALID_CTIME) + bitmask[1] |= FATTR4_WORD1_TIME_METADATA; if (cache_validity & NFS_INO_INVALID_MTIME) bitmask[1] |= FATTR4_WORD1_TIME_MODIFY; if (cache_validity & NFS_INO_INVALID_SIZE) @@ -5895,6 +5896,9 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE); int ret, i; + /* You can't remove system.nfs4_acl: */ + if (buflen == 0) + return -EINVAL; if (!nfs4_server_supports_acls(server)) return -EOPNOTSUPP; if (npages > ARRAY_SIZE(pages)) @@ -5971,7 +5975,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf, return ret; if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL)) return -ENOENT; - return 0; + return label.len; } static int nfs4_get_security_label(struct inode *inode, void *buf, diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 53fcbf79bdca..7629248fdd53 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -898,6 +898,8 @@ nfsd_file_find_locked(struct inode *inode, unsigned int may_flags, continue; if (!nfsd_match_cred(nf->nf_cred, current_cred())) continue; + if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) + continue; if (nfsd_file_get(nf) != NULL) return nf; } diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 8d6d2678abad..3581ce737e85 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1304,7 +1304,7 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src, struct nfsd_file *dst) { nfs42_ssc_close(src->nf_file); - /* 'src' is freed by nfsd4_do_async_copy */ + fput(src->nf_file); nfsd_file_put(dst); mntput(ss_mnt); } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1d2cd6a88f61..a501bb9a2fac 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4945,31 +4945,6 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, return fl; } -static int nfsd4_check_conflicting_opens(struct nfs4_client *clp, - struct nfs4_file *fp) -{ - struct nfs4_clnt_odstate *co; - struct file *f = fp->fi_deleg_file->nf_file; - struct inode *ino = locks_inode(f); - int writes = atomic_read(&ino->i_writecount); - - if (fp->fi_fds[O_WRONLY]) - writes--; - if (fp->fi_fds[O_RDWR]) - writes--; - if (writes > 0) - return -EAGAIN; - spin_lock(&fp->fi_lock); - list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) { - if (co->co_client != clp) { - spin_unlock(&fp->fi_lock); - return -EAGAIN; - } - } - spin_unlock(&fp->fi_lock); - return 0; -} - static struct nfs4_delegation * nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate) @@ -4989,12 +4964,9 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, nf = find_readable_file(fp); if (!nf) { - /* - * We probably could attempt another open and get a read - * delegation, but for now, don't bother until the - * client actually sends us one. - */ - return ERR_PTR(-EAGAIN); + /* We should always have a readable file here */ + WARN_ON_ONCE(1); + return ERR_PTR(-EBADF); } spin_lock(&state_lock); spin_lock(&fp->fi_lock); @@ -5024,19 +4996,11 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, if (!fl) goto out_clnt_odstate; - status = nfsd4_check_conflicting_opens(clp, fp); - if (status) { - locks_free_lock(fl); - goto out_clnt_odstate; - } status = vfs_setlease(fp->fi_deleg_file->nf_file, fl->fl_type, &fl, NULL); if (fl) locks_free_lock(fl); if (status) goto out_clnt_odstate; - status = nfsd4_check_conflicting_opens(clp, fp); - if (status) - goto out_clnt_odstate; spin_lock(&state_lock); spin_lock(&fp->fi_lock); @@ -5118,6 +5082,17 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, goto out_no_deleg; if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED)) goto out_no_deleg; + /* + * Also, if the file was opened for write or + * create, there's a good chance the client's + * about to write to it, resulting in an + * immediate recall (since we don't support + * write delegations): + */ + if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) + goto out_no_deleg; + if (open->op_create == NFS4_OPEN_CREATE) + goto out_no_deleg; break; default: goto out_no_deleg; @@ -5397,7 +5372,7 @@ nfs4_laundromat(struct nfsd_net *nn) idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) { cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid); if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID && - cps->cpntf_time > cutoff) + cps->cpntf_time < cutoff) _free_cpntf_state_locked(nn, cps); } spin_unlock(&nn->s2s_cp_lock); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index f6d5d783f4a4..0759e589ab52 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1522,12 +1522,9 @@ static int __init init_nfsd(void) int retval; printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n"); - retval = register_pernet_subsys(&nfsd_net_ops); - if (retval < 0) - return retval; retval = register_cld_notifier(); if (retval) - goto out_unregister_pernet; + return retval; retval = nfsd4_init_slabs(); if (retval) goto out_unregister_notifier; @@ -1544,9 +1541,14 @@ static int __init init_nfsd(void) goto out_free_lockd; retval = register_filesystem(&nfsd_fs_type); if (retval) + goto out_free_exports; + retval = register_pernet_subsys(&nfsd_net_ops); + if (retval < 0) goto out_free_all; return 0; out_free_all: + unregister_pernet_subsys(&nfsd_net_ops); +out_free_exports: remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); out_free_lockd: @@ -1559,13 +1561,12 @@ static int __init init_nfsd(void) nfsd4_free_slabs(); out_unregister_notifier: unregister_cld_notifier(); -out_unregister_pernet: - unregister_pernet_subsys(&nfsd_net_ops); return retval; } static void __exit exit_nfsd(void) { + unregister_pernet_subsys(&nfsd_net_ops); nfsd_drc_slab_free(); remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); @@ -1575,7 +1576,6 @@ static void __exit exit_nfsd(void) nfsd4_exit_pnfs(); unregister_filesystem(&nfsd_fs_type); unregister_cld_notifier(); - unregister_pernet_subsys(&nfsd_net_ops); } MODULE_AUTHOR("Olaf Kirch "); diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index f7e4cbc26eaf..be4ff9386ec0 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c @@ -629,6 +629,12 @@ static int ntfs_read_locked_inode(struct inode *vi) } a = ctx->attr; /* Get the standard information attribute value. */ + if ((u8 *)a + le16_to_cpu(a->data.resident.value_offset) + + le32_to_cpu(a->data.resident.value_length) > + (u8 *)ctx->mrec + vol->mft_record_size) { + ntfs_error(vi->i_sb, "Corrupt standard information attribute in inode."); + goto unm_err_out; + } si = (STANDARD_INFORMATION*)((u8*)a + le16_to_cpu(a->data.resident.value_offset)); diff --git a/fs/ntfs3/Kconfig b/fs/ntfs3/Kconfig new file mode 100644 index 000000000000..6e4cbc48ab8e --- /dev/null +++ b/fs/ntfs3/Kconfig @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: GPL-2.0-only +config NTFS3_FS + tristate "NTFS Read-Write file system support" + select NLS + help + Windows OS native file system (NTFS) support up to NTFS version 3.1. + + Y or M enables the NTFS3 driver with full features enabled (read, + write, journal replaying, sparse/compressed files support). + File system type to use on mount is "ntfs3". Module name (M option) + is also "ntfs3". + + Documentation: + +config NTFS3_64BIT_CLUSTER + bool "64 bits per NTFS clusters" + depends on NTFS3_FS && 64BIT + help + Windows implementation of ntfs.sys uses 32 bits per clusters. + If activated 64 bits per clusters you will be able to use 4k cluster + for 16T+ volumes. Windows will not be able to mount such volumes. + + It is recommended to say N here. + +config NTFS3_LZX_XPRESS + bool "activate support of external compressions lzx/xpress" + depends on NTFS3_FS + help + In Windows 10 one can use command "compact" to compress any files. + 4 possible variants of compression are: xpress4k, xpress8k, xpress16k and lzx. + If activated you will be able to read such files correctly. + + It is recommended to say Y here. + +config NTFS3_FS_POSIX_ACL + bool "NTFS POSIX Access Control Lists" + depends on NTFS3_FS + select FS_POSIX_ACL + help + POSIX Access Control Lists (ACLs) support additional access rights + for users and groups beyond the standard owner/group/world scheme, + and this option selects support for ACLs specifically for ntfs + filesystems. + NOTE: this is linux only feature. Windows will ignore these ACLs. + + If you don't know what Access Control Lists are, say N. diff --git a/fs/ntfs3/Makefile b/fs/ntfs3/Makefile new file mode 100644 index 000000000000..b06a06cc006f --- /dev/null +++ b/fs/ntfs3/Makefile @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the ntfs3 filesystem support. +# + +# to check robot warnings +ccflags-y += -Wunused-but-set-variable -Wold-style-declaration -Wint-to-pointer-cast + +obj-$(CONFIG_NTFS3_FS) += ntfs3.o + +ntfs3-y := attrib.o \ + attrlist.o \ + bitfunc.o \ + bitmap.o \ + dir.o \ + fsntfs.o \ + frecord.o \ + file.o \ + fslog.o \ + inode.o \ + index.o \ + lznt.o \ + namei.o \ + record.o \ + run.o \ + super.o \ + upcase.o \ + xattr.o + +ntfs3-$(CONFIG_NTFS3_LZX_XPRESS) += $(addprefix lib/,\ + decompress_common.o \ + lzx_decompress.o \ + xpress_decompress.o \ + ) \ No newline at end of file diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c new file mode 100644 index 000000000000..188c8b2806ae --- /dev/null +++ b/fs/ntfs3/attrib.c @@ -0,0 +1,2083 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + * TODO: merge attr_set_size/attr_data_get_block/attr_allocate_frame? + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" + +/* + * You can set external NTFS_MIN_LOG2_OF_CLUMP/NTFS_MAX_LOG2_OF_CLUMP to manage + * preallocate algorithm + */ +#ifndef NTFS_MIN_LOG2_OF_CLUMP +#define NTFS_MIN_LOG2_OF_CLUMP 16 +#endif + +#ifndef NTFS_MAX_LOG2_OF_CLUMP +#define NTFS_MAX_LOG2_OF_CLUMP 26 +#endif + +// 16M +#define NTFS_CLUMP_MIN (1 << (NTFS_MIN_LOG2_OF_CLUMP + 8)) +// 16G +#define NTFS_CLUMP_MAX (1ull << (NTFS_MAX_LOG2_OF_CLUMP + 8)) + +/* + * get_pre_allocated + * + */ +static inline u64 get_pre_allocated(u64 size) +{ + u32 clump; + u8 align_shift; + u64 ret; + + if (size <= NTFS_CLUMP_MIN) { + clump = 1 << NTFS_MIN_LOG2_OF_CLUMP; + align_shift = NTFS_MIN_LOG2_OF_CLUMP; + } else if (size >= NTFS_CLUMP_MAX) { + clump = 1 << NTFS_MAX_LOG2_OF_CLUMP; + align_shift = NTFS_MAX_LOG2_OF_CLUMP; + } else { + align_shift = NTFS_MIN_LOG2_OF_CLUMP - 1 + + __ffs(size >> (8 + NTFS_MIN_LOG2_OF_CLUMP)); + clump = 1u << align_shift; + } + + ret = (((size + clump - 1) >> align_shift)) << align_shift; + + return ret; +} + +/* + * attr_must_be_resident + * + * returns true if attribute must be resident + */ +static inline bool attr_must_be_resident(struct ntfs_sb_info *sbi, + enum ATTR_TYPE type) +{ + const struct ATTR_DEF_ENTRY *de; + + switch (type) { + case ATTR_STD: + case ATTR_NAME: + case ATTR_ID: + case ATTR_LABEL: + case ATTR_VOL_INFO: + case ATTR_ROOT: + case ATTR_EA_INFO: + return true; + default: + de = ntfs_query_def(sbi, type); + if (de && (de->flags & NTFS_ATTR_MUST_BE_RESIDENT)) + return true; + return false; + } +} + +/* + * attr_load_runs + * + * load all runs stored in 'attr' + */ +int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni, + struct runs_tree *run, const CLST *vcn) +{ + int err; + CLST svcn = le64_to_cpu(attr->nres.svcn); + CLST evcn = le64_to_cpu(attr->nres.evcn); + u32 asize; + u16 run_off; + + if (svcn >= evcn + 1 || run_is_mapped_full(run, svcn, evcn)) + return 0; + + if (vcn && (evcn < *vcn || *vcn < svcn)) + return -EINVAL; + + asize = le32_to_cpu(attr->size); + run_off = le16_to_cpu(attr->nres.run_off); + err = run_unpack_ex(run, ni->mi.sbi, ni->mi.rno, svcn, evcn, + vcn ? *vcn : svcn, Add2Ptr(attr, run_off), + asize - run_off); + if (err < 0) + return err; + + return 0; +} + +/* + * int run_deallocate_ex + * + * Deallocate clusters + */ +static int run_deallocate_ex(struct ntfs_sb_info *sbi, struct runs_tree *run, + CLST vcn, CLST len, CLST *done, bool trim) +{ + int err = 0; + CLST vcn_next, vcn0 = vcn, lcn, clen, dn = 0; + size_t idx; + + if (!len) + goto out; + + if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) { +failed: + run_truncate(run, vcn0); + err = -EINVAL; + goto out; + } + + for (;;) { + if (clen > len) + clen = len; + + if (!clen) { + err = -EINVAL; + goto out; + } + + if (lcn != SPARSE_LCN) { + mark_as_free_ex(sbi, lcn, clen, trim); + dn += clen; + } + + len -= clen; + if (!len) + break; + + vcn_next = vcn + clen; + if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) || + vcn != vcn_next) { + // save memory - don't load entire run + goto failed; + } + } + +out: + if (done) + *done += dn; + + return err; +} + +/* + * attr_allocate_clusters + * + * find free space, mark it as used and store in 'run' + */ +int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, + CLST vcn, CLST lcn, CLST len, CLST *pre_alloc, + enum ALLOCATE_OPT opt, CLST *alen, const size_t fr, + CLST *new_lcn) +{ + int err; + CLST flen, vcn0 = vcn, pre = pre_alloc ? *pre_alloc : 0; + struct wnd_bitmap *wnd = &sbi->used.bitmap; + size_t cnt = run->count; + + for (;;) { + err = ntfs_look_for_free_space(sbi, lcn, len + pre, &lcn, &flen, + opt); + + if (err == -ENOSPC && pre) { + pre = 0; + if (*pre_alloc) + *pre_alloc = 0; + continue; + } + + if (err) + goto out; + + if (new_lcn && vcn == vcn0) + *new_lcn = lcn; + + /* Add new fragment into run storage */ + if (!run_add_entry(run, vcn, lcn, flen, opt == ALLOCATE_MFT)) { + down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); + wnd_set_free(wnd, lcn, flen); + up_write(&wnd->rw_lock); + err = -ENOMEM; + goto out; + } + + vcn += flen; + + if (flen >= len || opt == ALLOCATE_MFT || + (fr && run->count - cnt >= fr)) { + *alen = vcn - vcn0; + return 0; + } + + len -= flen; + } + +out: + /* undo */ + run_deallocate_ex(sbi, run, vcn0, vcn - vcn0, NULL, false); + run_truncate(run, vcn0); + + return err; +} + +/* + * if page is not NULL - it is already contains resident data + * and locked (called from ni_write_frame) + */ +int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr, + struct ATTR_LIST_ENTRY *le, struct mft_inode *mi, + u64 new_size, struct runs_tree *run, + struct ATTRIB **ins_attr, struct page *page) +{ + struct ntfs_sb_info *sbi; + struct ATTRIB *attr_s; + struct MFT_REC *rec; + u32 used, asize, rsize, aoff, align; + bool is_data; + CLST len, alen; + char *next; + int err; + + if (attr->non_res) { + *ins_attr = attr; + return 0; + } + + sbi = mi->sbi; + rec = mi->mrec; + attr_s = NULL; + used = le32_to_cpu(rec->used); + asize = le32_to_cpu(attr->size); + next = Add2Ptr(attr, asize); + aoff = PtrOffset(rec, attr); + rsize = le32_to_cpu(attr->res.data_size); + is_data = attr->type == ATTR_DATA && !attr->name_len; + + align = sbi->cluster_size; + if (is_attr_compressed(attr)) + align <<= COMPRESSION_UNIT; + len = (rsize + align - 1) >> sbi->cluster_bits; + + run_init(run); + + /* make a copy of original attribute */ + attr_s = ntfs_memdup(attr, asize); + if (!attr_s) { + err = -ENOMEM; + goto out; + } + + if (!len) { + /* empty resident -> empty nonresident */ + alen = 0; + } else { + const char *data = resident_data(attr); + + err = attr_allocate_clusters(sbi, run, 0, 0, len, NULL, + ALLOCATE_DEF, &alen, 0, NULL); + if (err) + goto out1; + + if (!rsize) { + /* empty resident -> non empty nonresident */ + } else if (!is_data) { + err = ntfs_sb_write_run(sbi, run, 0, data, rsize); + if (err) + goto out2; + } else if (!page) { + char *kaddr; + + page = grab_cache_page(ni->vfs_inode.i_mapping, 0); + if (!page) { + err = -ENOMEM; + goto out2; + } + kaddr = kmap_atomic(page); + memcpy(kaddr, data, rsize); + memset(kaddr + rsize, 0, PAGE_SIZE - rsize); + kunmap_atomic(kaddr); + flush_dcache_page(page); + SetPageUptodate(page); + set_page_dirty(page); + unlock_page(page); + put_page(page); + } + } + + /* remove original attribute */ + used -= asize; + memmove(attr, Add2Ptr(attr, asize), used - aoff); + rec->used = cpu_to_le32(used); + mi->dirty = true; + if (le) + al_remove_le(ni, le); + + err = ni_insert_nonresident(ni, attr_s->type, attr_name(attr_s), + attr_s->name_len, run, 0, alen, + attr_s->flags, &attr, NULL); + if (err) + goto out3; + + ntfs_free(attr_s); + attr->nres.data_size = cpu_to_le64(rsize); + attr->nres.valid_size = attr->nres.data_size; + + *ins_attr = attr; + + if (is_data) + ni->ni_flags &= ~NI_FLAG_RESIDENT; + + /* Resident attribute becomes non resident */ + return 0; + +out3: + attr = Add2Ptr(rec, aoff); + memmove(next, attr, used - aoff); + memcpy(attr, attr_s, asize); + rec->used = cpu_to_le32(used + asize); + mi->dirty = true; +out2: + /* undo: do not trim new allocated clusters */ + run_deallocate(sbi, run, false); + run_close(run); +out1: + ntfs_free(attr_s); + /*reinsert le*/ +out: + return err; +} + +/* + * attr_set_size_res + * + * helper for attr_set_size + */ +static int attr_set_size_res(struct ntfs_inode *ni, struct ATTRIB *attr, + struct ATTR_LIST_ENTRY *le, struct mft_inode *mi, + u64 new_size, struct runs_tree *run, + struct ATTRIB **ins_attr) +{ + struct ntfs_sb_info *sbi = mi->sbi; + struct MFT_REC *rec = mi->mrec; + u32 used = le32_to_cpu(rec->used); + u32 asize = le32_to_cpu(attr->size); + u32 aoff = PtrOffset(rec, attr); + u32 rsize = le32_to_cpu(attr->res.data_size); + u32 tail = used - aoff - asize; + char *next = Add2Ptr(attr, asize); + s64 dsize = QuadAlign(new_size) - QuadAlign(rsize); + + if (dsize < 0) { + memmove(next + dsize, next, tail); + } else if (dsize > 0) { + if (used + dsize > sbi->max_bytes_per_attr) + return attr_make_nonresident(ni, attr, le, mi, new_size, + run, ins_attr, NULL); + + memmove(next + dsize, next, tail); + memset(next, 0, dsize); + } + + if (new_size > rsize) + memset(Add2Ptr(resident_data(attr), rsize), 0, + new_size - rsize); + + rec->used = cpu_to_le32(used + dsize); + attr->size = cpu_to_le32(asize + dsize); + attr->res.data_size = cpu_to_le32(new_size); + mi->dirty = true; + *ins_attr = attr; + + return 0; +} + +/* + * attr_set_size + * + * change the size of attribute + * Extend: + * - sparse/compressed: no allocated clusters + * - normal: append allocated and preallocated new clusters + * Shrink: + * - no deallocate if keep_prealloc is set + */ +int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, struct runs_tree *run, + u64 new_size, const u64 *new_valid, bool keep_prealloc, + struct ATTRIB **ret) +{ + int err = 0; + struct ntfs_sb_info *sbi = ni->mi.sbi; + u8 cluster_bits = sbi->cluster_bits; + bool is_mft = + ni->mi.rno == MFT_REC_MFT && type == ATTR_DATA && !name_len; + u64 old_valid, old_size, old_alloc, new_alloc, new_alloc_tmp; + struct ATTRIB *attr = NULL, *attr_b; + struct ATTR_LIST_ENTRY *le, *le_b; + struct mft_inode *mi, *mi_b; + CLST alen, vcn, lcn, new_alen, old_alen, svcn, evcn; + CLST next_svcn, pre_alloc = -1, done = 0; + bool is_ext; + u32 align; + struct MFT_REC *rec; + +again: + le_b = NULL; + attr_b = ni_find_attr(ni, NULL, &le_b, type, name, name_len, NULL, + &mi_b); + if (!attr_b) { + err = -ENOENT; + goto out; + } + + if (!attr_b->non_res) { + err = attr_set_size_res(ni, attr_b, le_b, mi_b, new_size, run, + &attr_b); + if (err || !attr_b->non_res) + goto out; + + /* layout of records may be changed, so do a full search */ + goto again; + } + + is_ext = is_attr_ext(attr_b); + +again_1: + align = sbi->cluster_size; + + if (is_ext) { + align <<= attr_b->nres.c_unit; + if (is_attr_sparsed(attr_b)) + keep_prealloc = false; + } + + old_valid = le64_to_cpu(attr_b->nres.valid_size); + old_size = le64_to_cpu(attr_b->nres.data_size); + old_alloc = le64_to_cpu(attr_b->nres.alloc_size); + old_alen = old_alloc >> cluster_bits; + + new_alloc = (new_size + align - 1) & ~(u64)(align - 1); + new_alen = new_alloc >> cluster_bits; + + if (keep_prealloc && is_ext) + keep_prealloc = false; + + if (keep_prealloc && new_size < old_size) { + attr_b->nres.data_size = cpu_to_le64(new_size); + mi_b->dirty = true; + goto ok; + } + + vcn = old_alen - 1; + + svcn = le64_to_cpu(attr_b->nres.svcn); + evcn = le64_to_cpu(attr_b->nres.evcn); + + if (svcn <= vcn && vcn <= evcn) { + attr = attr_b; + le = le_b; + mi = mi_b; + } else if (!le_b) { + err = -EINVAL; + goto out; + } else { + le = le_b; + attr = ni_find_attr(ni, attr_b, &le, type, name, name_len, &vcn, + &mi); + if (!attr) { + err = -EINVAL; + goto out; + } + +next_le_1: + svcn = le64_to_cpu(attr->nres.svcn); + evcn = le64_to_cpu(attr->nres.evcn); + } + +next_le: + rec = mi->mrec; + + err = attr_load_runs(attr, ni, run, NULL); + if (err) + goto out; + + if (new_size > old_size) { + CLST to_allocate; + size_t free; + + if (new_alloc <= old_alloc) { + attr_b->nres.data_size = cpu_to_le64(new_size); + mi_b->dirty = true; + goto ok; + } + + to_allocate = new_alen - old_alen; +add_alloc_in_same_attr_seg: + lcn = 0; + if (is_mft) { + /* mft allocates clusters from mftzone */ + pre_alloc = 0; + } else if (is_ext) { + /* no preallocate for sparse/compress */ + pre_alloc = 0; + } else if (pre_alloc == -1) { + pre_alloc = 0; + if (type == ATTR_DATA && !name_len && + sbi->options.prealloc) { + CLST new_alen2 = bytes_to_cluster( + sbi, get_pre_allocated(new_size)); + pre_alloc = new_alen2 - new_alen; + } + + /* Get the last lcn to allocate from */ + if (old_alen && + !run_lookup_entry(run, vcn, &lcn, NULL, NULL)) { + lcn = SPARSE_LCN; + } + + if (lcn == SPARSE_LCN) + lcn = 0; + else if (lcn) + lcn += 1; + + free = wnd_zeroes(&sbi->used.bitmap); + if (to_allocate > free) { + err = -ENOSPC; + goto out; + } + + if (pre_alloc && to_allocate + pre_alloc > free) + pre_alloc = 0; + } + + vcn = old_alen; + + if (is_ext) { + if (!run_add_entry(run, vcn, SPARSE_LCN, to_allocate, + false)) { + err = -ENOMEM; + goto out; + } + alen = to_allocate; + } else { + /* ~3 bytes per fragment */ + err = attr_allocate_clusters( + sbi, run, vcn, lcn, to_allocate, &pre_alloc, + is_mft ? ALLOCATE_MFT : 0, &alen, + is_mft ? 0 + : (sbi->record_size - + le32_to_cpu(rec->used) + 8) / + 3 + + 1, + NULL); + if (err) + goto out; + } + + done += alen; + vcn += alen; + if (to_allocate > alen) + to_allocate -= alen; + else + to_allocate = 0; + +pack_runs: + err = mi_pack_runs(mi, attr, run, vcn - svcn); + if (err) + goto out; + + next_svcn = le64_to_cpu(attr->nres.evcn) + 1; + new_alloc_tmp = (u64)next_svcn << cluster_bits; + attr_b->nres.alloc_size = cpu_to_le64(new_alloc_tmp); + mi_b->dirty = true; + + if (next_svcn >= vcn && !to_allocate) { + /* Normal way. update attribute and exit */ + attr_b->nres.data_size = cpu_to_le64(new_size); + goto ok; + } + + /* at least two mft to avoid recursive loop*/ + if (is_mft && next_svcn == vcn && + ((u64)done << sbi->cluster_bits) >= 2 * sbi->record_size) { + new_size = new_alloc_tmp; + attr_b->nres.data_size = attr_b->nres.alloc_size; + goto ok; + } + + if (le32_to_cpu(rec->used) < sbi->record_size) { + old_alen = next_svcn; + evcn = old_alen - 1; + goto add_alloc_in_same_attr_seg; + } + + attr_b->nres.data_size = attr_b->nres.alloc_size; + if (new_alloc_tmp < old_valid) + attr_b->nres.valid_size = attr_b->nres.data_size; + + if (type == ATTR_LIST) { + err = ni_expand_list(ni); + if (err) + goto out; + if (next_svcn < vcn) + goto pack_runs; + + /* layout of records is changed */ + goto again; + } + + if (!ni->attr_list.size) { + err = ni_create_attr_list(ni); + if (err) + goto out; + /* layout of records is changed */ + } + + if (next_svcn >= vcn) { + /* this is mft data, repeat */ + goto again; + } + + /* insert new attribute segment */ + err = ni_insert_nonresident(ni, type, name, name_len, run, + next_svcn, vcn - next_svcn, + attr_b->flags, &attr, &mi); + if (err) + goto out; + + if (!is_mft) + run_truncate_head(run, evcn + 1); + + svcn = le64_to_cpu(attr->nres.svcn); + evcn = le64_to_cpu(attr->nres.evcn); + + le_b = NULL; + /* layout of records maybe changed */ + /* find base attribute to update*/ + attr_b = ni_find_attr(ni, NULL, &le_b, type, name, name_len, + NULL, &mi_b); + if (!attr_b) { + err = -ENOENT; + goto out; + } + + attr_b->nres.alloc_size = cpu_to_le64((u64)vcn << cluster_bits); + attr_b->nres.data_size = attr_b->nres.alloc_size; + attr_b->nres.valid_size = attr_b->nres.alloc_size; + mi_b->dirty = true; + goto again_1; + } + + if (new_size != old_size || + (new_alloc != old_alloc && !keep_prealloc)) { + vcn = max(svcn, new_alen); + new_alloc_tmp = (u64)vcn << cluster_bits; + + alen = 0; + err = run_deallocate_ex(sbi, run, vcn, evcn - vcn + 1, &alen, + true); + if (err) + goto out; + + run_truncate(run, vcn); + + if (vcn > svcn) { + err = mi_pack_runs(mi, attr, run, vcn - svcn); + if (err) + goto out; + } else if (le && le->vcn) { + u16 le_sz = le16_to_cpu(le->size); + + /* + * NOTE: list entries for one attribute are always + * the same size. We deal with last entry (vcn==0) + * and it is not first in entries array + * (list entry for std attribute always first) + * So it is safe to step back + */ + mi_remove_attr(mi, attr); + + if (!al_remove_le(ni, le)) { + err = -EINVAL; + goto out; + } + + le = (struct ATTR_LIST_ENTRY *)((u8 *)le - le_sz); + } else { + attr->nres.evcn = cpu_to_le64((u64)vcn - 1); + mi->dirty = true; + } + + attr_b->nres.alloc_size = cpu_to_le64(new_alloc_tmp); + + if (vcn == new_alen) { + attr_b->nres.data_size = cpu_to_le64(new_size); + if (new_size < old_valid) + attr_b->nres.valid_size = + attr_b->nres.data_size; + } else { + if (new_alloc_tmp <= + le64_to_cpu(attr_b->nres.data_size)) + attr_b->nres.data_size = + attr_b->nres.alloc_size; + if (new_alloc_tmp < + le64_to_cpu(attr_b->nres.valid_size)) + attr_b->nres.valid_size = + attr_b->nres.alloc_size; + } + + if (is_ext) + le64_sub_cpu(&attr_b->nres.total_size, + ((u64)alen << cluster_bits)); + + mi_b->dirty = true; + + if (new_alloc_tmp <= new_alloc) + goto ok; + + old_size = new_alloc_tmp; + vcn = svcn - 1; + + if (le == le_b) { + attr = attr_b; + mi = mi_b; + evcn = svcn - 1; + svcn = 0; + goto next_le; + } + + if (le->type != type || le->name_len != name_len || + memcmp(le_name(le), name, name_len * sizeof(short))) { + err = -EINVAL; + goto out; + } + + err = ni_load_mi(ni, le, &mi); + if (err) + goto out; + + attr = mi_find_attr(mi, NULL, type, name, name_len, &le->id); + if (!attr) { + err = -EINVAL; + goto out; + } + goto next_le_1; + } + +ok: + if (new_valid) { + __le64 valid = cpu_to_le64(min(*new_valid, new_size)); + + if (attr_b->nres.valid_size != valid) { + attr_b->nres.valid_size = valid; + mi_b->dirty = true; + } + } + +out: + if (!err && attr_b && ret) + *ret = attr_b; + + /* update inode_set_bytes*/ + if (!err && ((type == ATTR_DATA && !name_len) || + (type == ATTR_ALLOC && name == I30_NAME))) { + bool dirty = false; + + if (ni->vfs_inode.i_size != new_size) { + ni->vfs_inode.i_size = new_size; + dirty = true; + } + + if (attr_b && attr_b->non_res) { + new_alloc = le64_to_cpu(attr_b->nres.alloc_size); + if (inode_get_bytes(&ni->vfs_inode) != new_alloc) { + inode_set_bytes(&ni->vfs_inode, new_alloc); + dirty = true; + } + } + + if (dirty) { + ni->ni_flags |= NI_FLAG_UPDATE_PARENT; + mark_inode_dirty(&ni->vfs_inode); + } + } + + return err; +} + +int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, + CLST *len, bool *new) +{ + int err = 0; + struct runs_tree *run = &ni->file.run; + struct ntfs_sb_info *sbi; + u8 cluster_bits; + struct ATTRIB *attr = NULL, *attr_b; + struct ATTR_LIST_ENTRY *le, *le_b; + struct mft_inode *mi, *mi_b; + CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end; + u64 total_size; + u32 clst_per_frame; + bool ok; + + if (new) + *new = false; + + down_read(&ni->file.run_lock); + ok = run_lookup_entry(run, vcn, lcn, len, NULL); + up_read(&ni->file.run_lock); + + if (ok && (*lcn != SPARSE_LCN || !new)) { + /* normal way */ + return 0; + } + + if (!clen) + clen = 1; + + if (ok && clen > *len) + clen = *len; + + sbi = ni->mi.sbi; + cluster_bits = sbi->cluster_bits; + + ni_lock(ni); + down_write(&ni->file.run_lock); + + le_b = NULL; + attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); + if (!attr_b) { + err = -ENOENT; + goto out; + } + + if (!attr_b->non_res) { + *lcn = RESIDENT_LCN; + *len = 1; + goto out; + } + + asize = le64_to_cpu(attr_b->nres.alloc_size) >> sbi->cluster_bits; + if (vcn >= asize) { + err = -EINVAL; + goto out; + } + + clst_per_frame = 1u << attr_b->nres.c_unit; + to_alloc = (clen + clst_per_frame - 1) & ~(clst_per_frame - 1); + + if (vcn + to_alloc > asize) + to_alloc = asize - vcn; + + svcn = le64_to_cpu(attr_b->nres.svcn); + evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1; + + attr = attr_b; + le = le_b; + mi = mi_b; + + if (le_b && (vcn < svcn || evcn1 <= vcn)) { + attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn, + &mi); + if (!attr) { + err = -EINVAL; + goto out; + } + svcn = le64_to_cpu(attr->nres.svcn); + evcn1 = le64_to_cpu(attr->nres.evcn) + 1; + } + + err = attr_load_runs(attr, ni, run, NULL); + if (err) + goto out; + + if (!ok) { + ok = run_lookup_entry(run, vcn, lcn, len, NULL); + if (ok && (*lcn != SPARSE_LCN || !new)) { + /* normal way */ + err = 0; + goto ok; + } + + if (!ok && !new) { + *len = 0; + err = 0; + goto ok; + } + + if (ok && clen > *len) { + clen = *len; + to_alloc = (clen + clst_per_frame - 1) & + ~(clst_per_frame - 1); + } + } + + if (!is_attr_ext(attr_b)) { + err = -EINVAL; + goto out; + } + + /* Get the last lcn to allocate from */ + hint = 0; + + if (vcn > evcn1) { + if (!run_add_entry(run, evcn1, SPARSE_LCN, vcn - evcn1, + false)) { + err = -ENOMEM; + goto out; + } + } else if (vcn && !run_lookup_entry(run, vcn - 1, &hint, NULL, NULL)) { + hint = -1; + } + + err = attr_allocate_clusters( + sbi, run, vcn, hint + 1, to_alloc, NULL, 0, len, + (sbi->record_size - le32_to_cpu(mi->mrec->used) + 8) / 3 + 1, + lcn); + if (err) + goto out; + *new = true; + + end = vcn + *len; + + total_size = le64_to_cpu(attr_b->nres.total_size) + + ((u64)*len << cluster_bits); + +repack: + err = mi_pack_runs(mi, attr, run, max(end, evcn1) - svcn); + if (err) + goto out; + + attr_b->nres.total_size = cpu_to_le64(total_size); + inode_set_bytes(&ni->vfs_inode, total_size); + ni->ni_flags |= NI_FLAG_UPDATE_PARENT; + + mi_b->dirty = true; + mark_inode_dirty(&ni->vfs_inode); + + /* stored [vcn : next_svcn) from [vcn : end) */ + next_svcn = le64_to_cpu(attr->nres.evcn) + 1; + + if (end <= evcn1) { + if (next_svcn == evcn1) { + /* Normal way. update attribute and exit */ + goto ok; + } + /* add new segment [next_svcn : evcn1 - next_svcn )*/ + if (!ni->attr_list.size) { + err = ni_create_attr_list(ni); + if (err) + goto out; + /* layout of records is changed */ + le_b = NULL; + attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, + 0, NULL, &mi_b); + if (!attr_b) { + err = -ENOENT; + goto out; + } + + attr = attr_b; + le = le_b; + mi = mi_b; + goto repack; + } + } + + svcn = evcn1; + + /* Estimate next attribute */ + attr = ni_find_attr(ni, attr, &le, ATTR_DATA, NULL, 0, &svcn, &mi); + + if (attr) { + CLST alloc = bytes_to_cluster( + sbi, le64_to_cpu(attr_b->nres.alloc_size)); + CLST evcn = le64_to_cpu(attr->nres.evcn); + + if (end < next_svcn) + end = next_svcn; + while (end > evcn) { + /* remove segment [svcn : evcn)*/ + mi_remove_attr(mi, attr); + + if (!al_remove_le(ni, le)) { + err = -EINVAL; + goto out; + } + + if (evcn + 1 >= alloc) { + /* last attribute segment */ + evcn1 = evcn + 1; + goto ins_ext; + } + + if (ni_load_mi(ni, le, &mi)) { + attr = NULL; + goto out; + } + + attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0, + &le->id); + if (!attr) { + err = -EINVAL; + goto out; + } + svcn = le64_to_cpu(attr->nres.svcn); + evcn = le64_to_cpu(attr->nres.evcn); + } + + if (end < svcn) + end = svcn; + + err = attr_load_runs(attr, ni, run, &end); + if (err) + goto out; + + evcn1 = evcn + 1; + attr->nres.svcn = cpu_to_le64(next_svcn); + err = mi_pack_runs(mi, attr, run, evcn1 - next_svcn); + if (err) + goto out; + + le->vcn = cpu_to_le64(next_svcn); + ni->attr_list.dirty = true; + mi->dirty = true; + + next_svcn = le64_to_cpu(attr->nres.evcn) + 1; + } +ins_ext: + if (evcn1 > next_svcn) { + err = ni_insert_nonresident(ni, ATTR_DATA, NULL, 0, run, + next_svcn, evcn1 - next_svcn, + attr_b->flags, &attr, &mi); + if (err) + goto out; + } +ok: + run_truncate_around(run, vcn); +out: + up_write(&ni->file.run_lock); + ni_unlock(ni); + + return err; +} + +int attr_data_read_resident(struct ntfs_inode *ni, struct page *page) +{ + u64 vbo; + struct ATTRIB *attr; + u32 data_size; + + attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, NULL); + if (!attr) + return -EINVAL; + + if (attr->non_res) + return E_NTFS_NONRESIDENT; + + vbo = page->index << PAGE_SHIFT; + data_size = le32_to_cpu(attr->res.data_size); + if (vbo < data_size) { + const char *data = resident_data(attr); + char *kaddr = kmap_atomic(page); + u32 use = data_size - vbo; + + if (use > PAGE_SIZE) + use = PAGE_SIZE; + + memcpy(kaddr, data + vbo, use); + memset(kaddr + use, 0, PAGE_SIZE - use); + kunmap_atomic(kaddr); + flush_dcache_page(page); + SetPageUptodate(page); + } else if (!PageUptodate(page)) { + zero_user_segment(page, 0, PAGE_SIZE); + SetPageUptodate(page); + } + + return 0; +} + +int attr_data_write_resident(struct ntfs_inode *ni, struct page *page) +{ + u64 vbo; + struct mft_inode *mi; + struct ATTRIB *attr; + u32 data_size; + + attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, &mi); + if (!attr) + return -EINVAL; + + if (attr->non_res) { + /*return special error code to check this case*/ + return E_NTFS_NONRESIDENT; + } + + vbo = page->index << PAGE_SHIFT; + data_size = le32_to_cpu(attr->res.data_size); + if (vbo < data_size) { + char *data = resident_data(attr); + char *kaddr = kmap_atomic(page); + u32 use = data_size - vbo; + + if (use > PAGE_SIZE) + use = PAGE_SIZE; + memcpy(data + vbo, kaddr, use); + kunmap_atomic(kaddr); + mi->dirty = true; + } + ni->i_valid = data_size; + + return 0; +} + +/* + * attr_load_runs_vcn + * + * load runs with vcn + */ +int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, struct runs_tree *run, + CLST vcn) +{ + struct ATTRIB *attr; + int err; + CLST svcn, evcn; + u16 ro; + + attr = ni_find_attr(ni, NULL, NULL, type, name, name_len, &vcn, NULL); + if (!attr) + return -ENOENT; + + svcn = le64_to_cpu(attr->nres.svcn); + evcn = le64_to_cpu(attr->nres.evcn); + + if (evcn < vcn || vcn < svcn) + return -EINVAL; + + ro = le16_to_cpu(attr->nres.run_off); + err = run_unpack_ex(run, ni->mi.sbi, ni->mi.rno, svcn, evcn, svcn, + Add2Ptr(attr, ro), le32_to_cpu(attr->size) - ro); + if (err < 0) + return err; + return 0; +} + +/* + * load runs for given range [from to) + */ +int attr_load_runs_range(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, struct runs_tree *run, + u64 from, u64 to) +{ + struct ntfs_sb_info *sbi = ni->mi.sbi; + u8 cluster_bits = sbi->cluster_bits; + CLST vcn = from >> cluster_bits; + CLST vcn_last = (to - 1) >> cluster_bits; + CLST lcn, clen; + int err; + + for (vcn = from >> cluster_bits; vcn <= vcn_last; vcn += clen) { + if (!run_lookup_entry(run, vcn, &lcn, &clen, NULL)) { + err = attr_load_runs_vcn(ni, type, name, name_len, run, + vcn); + if (err) + return err; + clen = 0; /*next run_lookup_entry(vcn) must be success*/ + } + } + + return 0; +} + +#ifdef CONFIG_NTFS3_LZX_XPRESS +/* + * attr_wof_frame_info + * + * read header of xpress/lzx file to get info about frame + */ +int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, + struct runs_tree *run, u64 frame, u64 frames, + u8 frame_bits, u32 *ondisk_size, u64 *vbo_data) +{ + struct ntfs_sb_info *sbi = ni->mi.sbi; + u64 vbo[2], off[2], wof_size; + u32 voff; + u8 bytes_per_off; + char *addr; + struct page *page; + int i, err; + __le32 *off32; + __le64 *off64; + + if (ni->vfs_inode.i_size < 0x100000000ull) { + /* file starts with array of 32 bit offsets */ + bytes_per_off = sizeof(__le32); + vbo[1] = frame << 2; + *vbo_data = frames << 2; + } else { + /* file starts with array of 64 bit offsets */ + bytes_per_off = sizeof(__le64); + vbo[1] = frame << 3; + *vbo_data = frames << 3; + } + + /* + * read 4/8 bytes at [vbo - 4(8)] == offset where compressed frame starts + * read 4/8 bytes at [vbo] == offset where compressed frame ends + */ + if (!attr->non_res) { + if (vbo[1] + bytes_per_off > le32_to_cpu(attr->res.data_size)) { + ntfs_inode_err(&ni->vfs_inode, "is corrupted"); + return -EINVAL; + } + addr = resident_data(attr); + + if (bytes_per_off == sizeof(__le32)) { + off32 = Add2Ptr(addr, vbo[1]); + off[0] = vbo[1] ? le32_to_cpu(off32[-1]) : 0; + off[1] = le32_to_cpu(off32[0]); + } else { + off64 = Add2Ptr(addr, vbo[1]); + off[0] = vbo[1] ? le64_to_cpu(off64[-1]) : 0; + off[1] = le64_to_cpu(off64[0]); + } + + *vbo_data += off[0]; + *ondisk_size = off[1] - off[0]; + return 0; + } + + wof_size = le64_to_cpu(attr->nres.data_size); + down_write(&ni->file.run_lock); + page = ni->file.offs_page; + if (!page) { + page = alloc_page(GFP_KERNEL); + if (!page) { + err = -ENOMEM; + goto out; + } + page->index = -1; + ni->file.offs_page = page; + } + lock_page(page); + addr = page_address(page); + + if (vbo[1]) { + voff = vbo[1] & (PAGE_SIZE - 1); + vbo[0] = vbo[1] - bytes_per_off; + i = 0; + } else { + voff = 0; + vbo[0] = 0; + off[0] = 0; + i = 1; + } + + do { + pgoff_t index = vbo[i] >> PAGE_SHIFT; + + if (index != page->index) { + u64 from = vbo[i] & ~(u64)(PAGE_SIZE - 1); + u64 to = min(from + PAGE_SIZE, wof_size); + + err = attr_load_runs_range(ni, ATTR_DATA, WOF_NAME, + ARRAY_SIZE(WOF_NAME), run, + from, to); + if (err) + goto out1; + + err = ntfs_bio_pages(sbi, run, &page, 1, from, + to - from, REQ_OP_READ); + if (err) { + page->index = -1; + goto out1; + } + page->index = index; + } + + if (i) { + if (bytes_per_off == sizeof(__le32)) { + off32 = Add2Ptr(addr, voff); + off[1] = le32_to_cpu(*off32); + } else { + off64 = Add2Ptr(addr, voff); + off[1] = le64_to_cpu(*off64); + } + } else if (!voff) { + if (bytes_per_off == sizeof(__le32)) { + off32 = Add2Ptr(addr, PAGE_SIZE - sizeof(u32)); + off[0] = le32_to_cpu(*off32); + } else { + off64 = Add2Ptr(addr, PAGE_SIZE - sizeof(u64)); + off[0] = le64_to_cpu(*off64); + } + } else { + /* two values in one page*/ + if (bytes_per_off == sizeof(__le32)) { + off32 = Add2Ptr(addr, voff); + off[0] = le32_to_cpu(off32[-1]); + off[1] = le32_to_cpu(off32[0]); + } else { + off64 = Add2Ptr(addr, voff); + off[0] = le64_to_cpu(off64[-1]); + off[1] = le64_to_cpu(off64[0]); + } + break; + } + } while (++i < 2); + + *vbo_data += off[0]; + *ondisk_size = off[1] - off[0]; + +out1: + unlock_page(page); +out: + up_write(&ni->file.run_lock); + return err; +} +#endif + +/* + * attr_is_frame_compressed + * + * This function is used to detect compressed frame + */ +int attr_is_frame_compressed(struct ntfs_inode *ni, struct ATTRIB *attr, + CLST frame, CLST *clst_data) +{ + int err; + u32 clst_frame; + CLST clen, lcn, vcn, alen, slen, vcn_next; + size_t idx; + struct runs_tree *run; + + *clst_data = 0; + + if (!is_attr_compressed(attr)) + return 0; + + if (!attr->non_res) + return 0; + + clst_frame = 1u << attr->nres.c_unit; + vcn = frame * clst_frame; + run = &ni->file.run; + + if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) { + err = attr_load_runs_vcn(ni, attr->type, attr_name(attr), + attr->name_len, run, vcn); + if (err) + return err; + + if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) + return -EINVAL; + } + + if (lcn == SPARSE_LCN) { + /* sparsed frame */ + return 0; + } + + if (clen >= clst_frame) { + /* + * The frame is not compressed 'cause + * it does not contain any sparse clusters + */ + *clst_data = clst_frame; + return 0; + } + + alen = bytes_to_cluster(ni->mi.sbi, le64_to_cpu(attr->nres.alloc_size)); + slen = 0; + *clst_data = clen; + + /* + * The frame is compressed if *clst_data + slen >= clst_frame + * Check next fragments + */ + while ((vcn += clen) < alen) { + vcn_next = vcn; + + if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) || + vcn_next != vcn) { + err = attr_load_runs_vcn(ni, attr->type, + attr_name(attr), + attr->name_len, run, vcn_next); + if (err) + return err; + vcn = vcn_next; + + if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) + return -EINVAL; + } + + if (lcn == SPARSE_LCN) { + slen += clen; + } else { + if (slen) { + /* + * data_clusters + sparse_clusters = + * not enough for frame + */ + return -EINVAL; + } + *clst_data += clen; + } + + if (*clst_data + slen >= clst_frame) { + if (!slen) { + /* + * There is no sparsed clusters in this frame + * So it is not compressed + */ + *clst_data = clst_frame; + } else { + /*frame is compressed*/ + } + break; + } + } + + return 0; +} + +/* + * attr_allocate_frame + * + * allocate/free clusters for 'frame' + * assumed: down_write(&ni->file.run_lock); + */ +int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, + u64 new_valid) +{ + int err = 0; + struct runs_tree *run = &ni->file.run; + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct ATTRIB *attr = NULL, *attr_b; + struct ATTR_LIST_ENTRY *le, *le_b; + struct mft_inode *mi, *mi_b; + CLST svcn, evcn1, next_svcn, lcn, len; + CLST vcn, end, clst_data; + u64 total_size, valid_size, data_size; + + le_b = NULL; + attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); + if (!attr_b) + return -ENOENT; + + if (!is_attr_ext(attr_b)) + return -EINVAL; + + vcn = frame << NTFS_LZNT_CUNIT; + total_size = le64_to_cpu(attr_b->nres.total_size); + + svcn = le64_to_cpu(attr_b->nres.svcn); + evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1; + data_size = le64_to_cpu(attr_b->nres.data_size); + + if (svcn <= vcn && vcn < evcn1) { + attr = attr_b; + le = le_b; + mi = mi_b; + } else if (!le_b) { + err = -EINVAL; + goto out; + } else { + le = le_b; + attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn, + &mi); + if (!attr) { + err = -EINVAL; + goto out; + } + svcn = le64_to_cpu(attr->nres.svcn); + evcn1 = le64_to_cpu(attr->nres.evcn) + 1; + } + + err = attr_load_runs(attr, ni, run, NULL); + if (err) + goto out; + + err = attr_is_frame_compressed(ni, attr_b, frame, &clst_data); + if (err) + goto out; + + total_size -= (u64)clst_data << sbi->cluster_bits; + + len = bytes_to_cluster(sbi, compr_size); + + if (len == clst_data) + goto out; + + if (len < clst_data) { + err = run_deallocate_ex(sbi, run, vcn + len, clst_data - len, + NULL, true); + if (err) + goto out; + + if (!run_add_entry(run, vcn + len, SPARSE_LCN, clst_data - len, + false)) { + err = -ENOMEM; + goto out; + } + end = vcn + clst_data; + /* run contains updated range [vcn + len : end) */ + } else { + CLST alen, hint = 0; + /* Get the last lcn to allocate from */ + if (vcn + clst_data && + !run_lookup_entry(run, vcn + clst_data - 1, &hint, NULL, + NULL)) { + hint = -1; + } + + err = attr_allocate_clusters(sbi, run, vcn + clst_data, + hint + 1, len - clst_data, NULL, 0, + &alen, 0, &lcn); + if (err) + goto out; + + end = vcn + len; + /* run contains updated range [vcn + clst_data : end) */ + } + + total_size += (u64)len << sbi->cluster_bits; + +repack: + err = mi_pack_runs(mi, attr, run, max(end, evcn1) - svcn); + if (err) + goto out; + + attr_b->nres.total_size = cpu_to_le64(total_size); + inode_set_bytes(&ni->vfs_inode, total_size); + + mi_b->dirty = true; + mark_inode_dirty(&ni->vfs_inode); + + /* stored [vcn : next_svcn) from [vcn : end) */ + next_svcn = le64_to_cpu(attr->nres.evcn) + 1; + + if (end <= evcn1) { + if (next_svcn == evcn1) { + /* Normal way. update attribute and exit */ + goto ok; + } + /* add new segment [next_svcn : evcn1 - next_svcn )*/ + if (!ni->attr_list.size) { + err = ni_create_attr_list(ni); + if (err) + goto out; + /* layout of records is changed */ + le_b = NULL; + attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, + 0, NULL, &mi_b); + if (!attr_b) { + err = -ENOENT; + goto out; + } + + attr = attr_b; + le = le_b; + mi = mi_b; + goto repack; + } + } + + svcn = evcn1; + + /* Estimate next attribute */ + attr = ni_find_attr(ni, attr, &le, ATTR_DATA, NULL, 0, &svcn, &mi); + + if (attr) { + CLST alloc = bytes_to_cluster( + sbi, le64_to_cpu(attr_b->nres.alloc_size)); + CLST evcn = le64_to_cpu(attr->nres.evcn); + + if (end < next_svcn) + end = next_svcn; + while (end > evcn) { + /* remove segment [svcn : evcn)*/ + mi_remove_attr(mi, attr); + + if (!al_remove_le(ni, le)) { + err = -EINVAL; + goto out; + } + + if (evcn + 1 >= alloc) { + /* last attribute segment */ + evcn1 = evcn + 1; + goto ins_ext; + } + + if (ni_load_mi(ni, le, &mi)) { + attr = NULL; + goto out; + } + + attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0, + &le->id); + if (!attr) { + err = -EINVAL; + goto out; + } + svcn = le64_to_cpu(attr->nres.svcn); + evcn = le64_to_cpu(attr->nres.evcn); + } + + if (end < svcn) + end = svcn; + + err = attr_load_runs(attr, ni, run, &end); + if (err) + goto out; + + evcn1 = evcn + 1; + attr->nres.svcn = cpu_to_le64(next_svcn); + err = mi_pack_runs(mi, attr, run, evcn1 - next_svcn); + if (err) + goto out; + + le->vcn = cpu_to_le64(next_svcn); + ni->attr_list.dirty = true; + mi->dirty = true; + + next_svcn = le64_to_cpu(attr->nres.evcn) + 1; + } +ins_ext: + if (evcn1 > next_svcn) { + err = ni_insert_nonresident(ni, ATTR_DATA, NULL, 0, run, + next_svcn, evcn1 - next_svcn, + attr_b->flags, &attr, &mi); + if (err) + goto out; + } +ok: + run_truncate_around(run, vcn); +out: + if (new_valid > data_size) + new_valid = data_size; + + valid_size = le64_to_cpu(attr_b->nres.valid_size); + if (new_valid != valid_size) { + attr_b->nres.valid_size = cpu_to_le64(valid_size); + mi_b->dirty = true; + } + + return err; +} + +/* Collapse range in file */ +int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes) +{ + int err = 0; + struct runs_tree *run = &ni->file.run; + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct ATTRIB *attr = NULL, *attr_b; + struct ATTR_LIST_ENTRY *le, *le_b; + struct mft_inode *mi, *mi_b; + CLST svcn, evcn1, len, dealloc, alen; + CLST vcn, end; + u64 valid_size, data_size, alloc_size, total_size; + u32 mask; + __le16 a_flags; + + if (!bytes) + return 0; + + le_b = NULL; + attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); + if (!attr_b) + return -ENOENT; + + if (!attr_b->non_res) { + /* Attribute is resident. Nothing to do? */ + return 0; + } + + data_size = le64_to_cpu(attr_b->nres.data_size); + alloc_size = le64_to_cpu(attr_b->nres.alloc_size); + a_flags = attr_b->flags; + + if (is_attr_ext(attr_b)) { + total_size = le64_to_cpu(attr_b->nres.total_size); + mask = (sbi->cluster_size << attr_b->nres.c_unit) - 1; + } else { + total_size = alloc_size; + mask = sbi->cluster_mask; + } + + if (vbo & mask) + return -EINVAL; + + if (bytes & mask) + return -EINVAL; + + if (vbo > data_size) + return -EINVAL; + + down_write(&ni->file.run_lock); + + if (vbo + bytes >= data_size) { + u64 new_valid = min(ni->i_valid, vbo); + + /* Simple truncate file at 'vbo' */ + truncate_setsize(&ni->vfs_inode, vbo); + err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, vbo, + &new_valid, true, NULL); + + if (!err && new_valid < ni->i_valid) + ni->i_valid = new_valid; + + goto out; + } + + /* + * Enumerate all attribute segments and collapse + */ + alen = alloc_size >> sbi->cluster_bits; + vcn = vbo >> sbi->cluster_bits; + len = bytes >> sbi->cluster_bits; + end = vcn + len; + dealloc = 0; + + svcn = le64_to_cpu(attr_b->nres.svcn); + evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1; + + if (svcn <= vcn && vcn < evcn1) { + attr = attr_b; + le = le_b; + mi = mi_b; + } else if (!le_b) { + err = -EINVAL; + goto out; + } else { + le = le_b; + attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn, + &mi); + if (!attr) { + err = -EINVAL; + goto out; + } + + svcn = le64_to_cpu(attr->nres.svcn); + evcn1 = le64_to_cpu(attr->nres.evcn) + 1; + } + + for (;;) { + if (svcn >= end) { + /* shift vcn */ + attr->nres.svcn = cpu_to_le64(svcn - len); + attr->nres.evcn = cpu_to_le64(evcn1 - 1 - len); + if (le) { + le->vcn = attr->nres.svcn; + ni->attr_list.dirty = true; + } + mi->dirty = true; + } else if (svcn < vcn || end < evcn1) { + CLST vcn1, eat, next_svcn; + + /* collapse a part of this attribute segment */ + err = attr_load_runs(attr, ni, run, &svcn); + if (err) + goto out; + vcn1 = max(vcn, svcn); + eat = min(end, evcn1) - vcn1; + + err = run_deallocate_ex(sbi, run, vcn1, eat, &dealloc, + true); + if (err) + goto out; + + if (!run_collapse_range(run, vcn1, eat)) { + err = -ENOMEM; + goto out; + } + + if (svcn >= vcn) { + /* shift vcn */ + attr->nres.svcn = cpu_to_le64(vcn); + if (le) { + le->vcn = attr->nres.svcn; + ni->attr_list.dirty = true; + } + } + + err = mi_pack_runs(mi, attr, run, evcn1 - svcn - eat); + if (err) + goto out; + + next_svcn = le64_to_cpu(attr->nres.evcn) + 1; + if (next_svcn + eat < evcn1) { + err = ni_insert_nonresident( + ni, ATTR_DATA, NULL, 0, run, next_svcn, + evcn1 - eat - next_svcn, a_flags, &attr, + &mi); + if (err) + goto out; + + /* layout of records maybe changed */ + attr_b = NULL; + le = al_find_ex(ni, NULL, ATTR_DATA, NULL, 0, + &next_svcn); + if (!le) { + err = -EINVAL; + goto out; + } + } + + /* free all allocated memory */ + run_truncate(run, 0); + } else { + u16 le_sz; + u16 roff = le16_to_cpu(attr->nres.run_off); + + /*run==1 means unpack and deallocate*/ + run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, + evcn1 - 1, svcn, Add2Ptr(attr, roff), + le32_to_cpu(attr->size) - roff); + + /* delete this attribute segment */ + mi_remove_attr(mi, attr); + if (!le) + break; + + le_sz = le16_to_cpu(le->size); + if (!al_remove_le(ni, le)) { + err = -EINVAL; + goto out; + } + + if (evcn1 >= alen) + break; + + if (!svcn) { + /* Load next record that contains this attribute */ + if (ni_load_mi(ni, le, &mi)) { + err = -EINVAL; + goto out; + } + + /* Look for required attribute */ + attr = mi_find_attr(mi, NULL, ATTR_DATA, NULL, + 0, &le->id); + if (!attr) { + err = -EINVAL; + goto out; + } + goto next_attr; + } + le = (struct ATTR_LIST_ENTRY *)((u8 *)le - le_sz); + } + + if (evcn1 >= alen) + break; + + attr = ni_enum_attr_ex(ni, attr, &le, &mi); + if (!attr) { + err = -EINVAL; + goto out; + } + +next_attr: + svcn = le64_to_cpu(attr->nres.svcn); + evcn1 = le64_to_cpu(attr->nres.evcn) + 1; + } + + if (!attr_b) { + le_b = NULL; + attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, + &mi_b); + if (!attr_b) { + err = -ENOENT; + goto out; + } + } + + data_size -= bytes; + valid_size = ni->i_valid; + if (vbo + bytes <= valid_size) + valid_size -= bytes; + else if (vbo < valid_size) + valid_size = vbo; + + attr_b->nres.alloc_size = cpu_to_le64(alloc_size - bytes); + attr_b->nres.data_size = cpu_to_le64(data_size); + attr_b->nres.valid_size = cpu_to_le64(min(valid_size, data_size)); + total_size -= (u64)dealloc << sbi->cluster_bits; + if (is_attr_ext(attr_b)) + attr_b->nres.total_size = cpu_to_le64(total_size); + mi_b->dirty = true; + + /*update inode size*/ + ni->i_valid = valid_size; + ni->vfs_inode.i_size = data_size; + inode_set_bytes(&ni->vfs_inode, total_size); + ni->ni_flags |= NI_FLAG_UPDATE_PARENT; + mark_inode_dirty(&ni->vfs_inode); + +out: + up_write(&ni->file.run_lock); + if (err) + make_bad_inode(&ni->vfs_inode); + + return err; +} + +/* not for normal files */ +int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes) +{ + int err = 0; + struct runs_tree *run = &ni->file.run; + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct ATTRIB *attr = NULL, *attr_b; + struct ATTR_LIST_ENTRY *le, *le_b; + struct mft_inode *mi, *mi_b; + CLST svcn, evcn1, vcn, len, end, alen, dealloc; + u64 total_size, alloc_size; + + if (!bytes) + return 0; + + le_b = NULL; + attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b); + if (!attr_b) + return -ENOENT; + + if (!attr_b->non_res) { + u32 data_size = le32_to_cpu(attr->res.data_size); + u32 from, to; + + if (vbo > data_size) + return 0; + + from = vbo; + to = (vbo + bytes) < data_size ? (vbo + bytes) : data_size; + memset(Add2Ptr(resident_data(attr_b), from), 0, to - from); + return 0; + } + + /* TODO: add support for normal files too */ + if (!is_attr_ext(attr_b)) + return -EOPNOTSUPP; + + alloc_size = le64_to_cpu(attr_b->nres.alloc_size); + total_size = le64_to_cpu(attr_b->nres.total_size); + + if (vbo >= alloc_size) { + // NOTE: it is allowed + return 0; + } + + if (vbo + bytes > alloc_size) + bytes = alloc_size - vbo; + + down_write(&ni->file.run_lock); + /* + * Enumerate all attribute segments and punch hole where necessary + */ + alen = alloc_size >> sbi->cluster_bits; + vcn = vbo >> sbi->cluster_bits; + len = bytes >> sbi->cluster_bits; + end = vcn + len; + dealloc = 0; + + svcn = le64_to_cpu(attr_b->nres.svcn); + evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1; + + if (svcn <= vcn && vcn < evcn1) { + attr = attr_b; + le = le_b; + mi = mi_b; + } else if (!le_b) { + err = -EINVAL; + goto out; + } else { + le = le_b; + attr = ni_find_attr(ni, attr_b, &le, ATTR_DATA, NULL, 0, &vcn, + &mi); + if (!attr) { + err = -EINVAL; + goto out; + } + + svcn = le64_to_cpu(attr->nres.svcn); + evcn1 = le64_to_cpu(attr->nres.evcn) + 1; + } + + while (svcn < end) { + CLST vcn1, zero, dealloc2; + + err = attr_load_runs(attr, ni, run, &svcn); + if (err) + goto out; + vcn1 = max(vcn, svcn); + zero = min(end, evcn1) - vcn1; + + dealloc2 = dealloc; + err = run_deallocate_ex(sbi, run, vcn1, zero, &dealloc, true); + if (err) + goto out; + + if (dealloc2 == dealloc) { + /* looks like the required range is already sparsed */ + } else { + if (!run_add_entry(run, vcn1, SPARSE_LCN, zero, + false)) { + err = -ENOMEM; + goto out; + } + + err = mi_pack_runs(mi, attr, run, evcn1 - svcn); + if (err) + goto out; + } + /* free all allocated memory */ + run_truncate(run, 0); + + if (evcn1 >= alen) + break; + + attr = ni_enum_attr_ex(ni, attr, &le, &mi); + if (!attr) { + err = -EINVAL; + goto out; + } + + svcn = le64_to_cpu(attr->nres.svcn); + evcn1 = le64_to_cpu(attr->nres.evcn) + 1; + } + + total_size -= (u64)dealloc << sbi->cluster_bits; + attr_b->nres.total_size = cpu_to_le64(total_size); + mi_b->dirty = true; + + /*update inode size*/ + inode_set_bytes(&ni->vfs_inode, total_size); + ni->ni_flags |= NI_FLAG_UPDATE_PARENT; + mark_inode_dirty(&ni->vfs_inode); + +out: + up_write(&ni->file.run_lock); + if (err) + make_bad_inode(&ni->vfs_inode); + + return err; +} diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c new file mode 100644 index 000000000000..ea561361b576 --- /dev/null +++ b/fs/ntfs3/attrlist.c @@ -0,0 +1,456 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + */ + +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" + +/* Returns true if le is valid */ +static inline bool al_is_valid_le(const struct ntfs_inode *ni, + struct ATTR_LIST_ENTRY *le) +{ + if (!le || !ni->attr_list.le || !ni->attr_list.size) + return false; + + return PtrOffset(ni->attr_list.le, le) + le16_to_cpu(le->size) <= + ni->attr_list.size; +} + +void al_destroy(struct ntfs_inode *ni) +{ + run_close(&ni->attr_list.run); + ntfs_free(ni->attr_list.le); + ni->attr_list.le = NULL; + ni->attr_list.size = 0; + ni->attr_list.dirty = false; +} + +/* + * ntfs_load_attr_list + * + * This method makes sure that the ATTRIB list, if present, + * has been properly set up. + */ +int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr) +{ + int err; + size_t lsize; + void *le = NULL; + + if (ni->attr_list.size) + return 0; + + if (!attr->non_res) { + lsize = le32_to_cpu(attr->res.data_size); + le = ntfs_malloc(al_aligned(lsize)); + if (!le) { + err = -ENOMEM; + goto out; + } + memcpy(le, resident_data(attr), lsize); + } else if (attr->nres.svcn) { + err = -EINVAL; + goto out; + } else { + u16 run_off = le16_to_cpu(attr->nres.run_off); + + lsize = le64_to_cpu(attr->nres.data_size); + + run_init(&ni->attr_list.run); + + err = run_unpack_ex(&ni->attr_list.run, ni->mi.sbi, ni->mi.rno, + 0, le64_to_cpu(attr->nres.evcn), 0, + Add2Ptr(attr, run_off), + le32_to_cpu(attr->size) - run_off); + if (err < 0) + goto out; + + le = ntfs_malloc(al_aligned(lsize)); + if (!le) { + err = -ENOMEM; + goto out; + } + + err = ntfs_read_run_nb(ni->mi.sbi, &ni->attr_list.run, 0, le, + lsize, NULL); + if (err) + goto out; + } + + ni->attr_list.size = lsize; + ni->attr_list.le = le; + + return 0; + +out: + ni->attr_list.le = le; + al_destroy(ni); + + return err; +} + +/* + * al_enumerate + * + * Returns the next list 'le' + * if 'le' is NULL then returns the first 'le' + */ +struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, + struct ATTR_LIST_ENTRY *le) +{ + size_t off; + u16 sz; + + if (!le) { + le = ni->attr_list.le; + } else { + sz = le16_to_cpu(le->size); + if (sz < sizeof(struct ATTR_LIST_ENTRY)) { + /* Impossible 'cause we should not return such 'le' */ + return NULL; + } + le = Add2Ptr(le, sz); + } + + /* Check boundary */ + off = PtrOffset(ni->attr_list.le, le); + if (off + sizeof(struct ATTR_LIST_ENTRY) > ni->attr_list.size) { + // The regular end of list + return NULL; + } + + sz = le16_to_cpu(le->size); + + /* Check 'le' for errors */ + if (sz < sizeof(struct ATTR_LIST_ENTRY) || + off + sz > ni->attr_list.size || + sz < le->name_off + le->name_len * sizeof(short)) { + return NULL; + } + + return le; +} + +/* + * al_find_le + * + * finds the first 'le' in the list which matches type, name and vcn + * Returns NULL if not found + */ +struct ATTR_LIST_ENTRY *al_find_le(struct ntfs_inode *ni, + struct ATTR_LIST_ENTRY *le, + const struct ATTRIB *attr) +{ + CLST svcn = attr_svcn(attr); + + return al_find_ex(ni, le, attr->type, attr_name(attr), attr->name_len, + &svcn); +} + +/* + * al_find_ex + * + * finds the first 'le' in the list which matches type, name and vcn + * Returns NULL if not found + */ +struct ATTR_LIST_ENTRY *al_find_ex(struct ntfs_inode *ni, + struct ATTR_LIST_ENTRY *le, + enum ATTR_TYPE type, const __le16 *name, + u8 name_len, const CLST *vcn) +{ + struct ATTR_LIST_ENTRY *ret = NULL; + u32 type_in = le32_to_cpu(type); + + while ((le = al_enumerate(ni, le))) { + u64 le_vcn; + int diff = le32_to_cpu(le->type) - type_in; + + /* List entries are sorted by type, name and vcn */ + if (diff < 0) + continue; + + if (diff > 0) + return ret; + + if (le->name_len != name_len) + continue; + + le_vcn = le64_to_cpu(le->vcn); + if (!le_vcn) { + /* + * compare entry names only for entry with vcn == 0 + */ + diff = ntfs_cmp_names(le_name(le), name_len, name, + name_len, ni->mi.sbi->upcase, + true); + if (diff < 0) + continue; + + if (diff > 0) + return ret; + } + + if (!vcn) + return le; + + if (*vcn == le_vcn) + return le; + + if (*vcn < le_vcn) + return ret; + + ret = le; + } + + return ret; +} + +/* + * al_find_le_to_insert + * + * finds the first list entry which matches type, name and vcn + */ +static struct ATTR_LIST_ENTRY *al_find_le_to_insert(struct ntfs_inode *ni, + enum ATTR_TYPE type, + const __le16 *name, + u8 name_len, CLST vcn) +{ + struct ATTR_LIST_ENTRY *le = NULL, *prev; + u32 type_in = le32_to_cpu(type); + + /* List entries are sorted by type, name, vcn */ + while ((le = al_enumerate(ni, prev = le))) { + int diff = le32_to_cpu(le->type) - type_in; + + if (diff < 0) + continue; + + if (diff > 0) + return le; + + if (!le->vcn) { + /* + * compare entry names only for entry with vcn == 0 + */ + diff = ntfs_cmp_names(le_name(le), le->name_len, name, + name_len, ni->mi.sbi->upcase, + true); + if (diff < 0) + continue; + + if (diff > 0) + return le; + } + + if (le64_to_cpu(le->vcn) >= vcn) + return le; + } + + return prev ? Add2Ptr(prev, le16_to_cpu(prev->size)) : ni->attr_list.le; +} + +/* + * al_add_le + * + * adds an "attribute list entry" to the list. + */ +int al_add_le(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name, + u8 name_len, CLST svcn, __le16 id, const struct MFT_REF *ref, + struct ATTR_LIST_ENTRY **new_le) +{ + int err; + struct ATTRIB *attr; + struct ATTR_LIST_ENTRY *le; + size_t off; + u16 sz; + size_t asize, new_asize; + u64 new_size; + typeof(ni->attr_list) *al = &ni->attr_list; + + /* + * Compute the size of the new 'le' + */ + sz = le_size(name_len); + new_size = al->size + sz; + asize = al_aligned(al->size); + new_asize = al_aligned(new_size); + + /* Scan forward to the point at which the new 'le' should be inserted. */ + le = al_find_le_to_insert(ni, type, name, name_len, svcn); + off = PtrOffset(al->le, le); + + if (new_size > asize) { + void *ptr = ntfs_malloc(new_asize); + + if (!ptr) + return -ENOMEM; + + memcpy(ptr, al->le, off); + memcpy(Add2Ptr(ptr, off + sz), le, al->size - off); + le = Add2Ptr(ptr, off); + ntfs_free(al->le); + al->le = ptr; + } else { + memmove(Add2Ptr(le, sz), le, al->size - off); + } + + al->size = new_size; + + le->type = type; + le->size = cpu_to_le16(sz); + le->name_len = name_len; + le->name_off = offsetof(struct ATTR_LIST_ENTRY, name); + le->vcn = cpu_to_le64(svcn); + le->ref = *ref; + le->id = id; + memcpy(le->name, name, sizeof(short) * name_len); + + al->dirty = true; + + err = attr_set_size(ni, ATTR_LIST, NULL, 0, &al->run, new_size, + &new_size, true, &attr); + if (err) + return err; + + if (attr && attr->non_res) { + err = ntfs_sb_write_run(ni->mi.sbi, &al->run, 0, al->le, + al->size); + if (err) + return err; + } + + al->dirty = false; + *new_le = le; + + return 0; +} + +/* + * al_remove_le + * + * removes 'le' from attribute list + */ +bool al_remove_le(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le) +{ + u16 size; + size_t off; + typeof(ni->attr_list) *al = &ni->attr_list; + + if (!al_is_valid_le(ni, le)) + return false; + + /* Save on stack the size of 'le' */ + size = le16_to_cpu(le->size); + off = PtrOffset(al->le, le); + + memmove(le, Add2Ptr(le, size), al->size - (off + size)); + + al->size -= size; + al->dirty = true; + + return true; +} + +/* + * al_delete_le + * + * deletes from the list the first 'le' which matches its parameters. + */ +bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn, + const __le16 *name, size_t name_len, + const struct MFT_REF *ref) +{ + u16 size; + struct ATTR_LIST_ENTRY *le; + size_t off; + typeof(ni->attr_list) *al = &ni->attr_list; + + /* Scan forward to the first 'le' that matches the input */ + le = al_find_ex(ni, NULL, type, name, name_len, &vcn); + if (!le) + return false; + + off = PtrOffset(al->le, le); + +next: + if (off >= al->size) + return false; + if (le->type != type) + return false; + if (le->name_len != name_len) + return false; + if (name_len && ntfs_cmp_names(le_name(le), name_len, name, name_len, + ni->mi.sbi->upcase, true)) + return false; + if (le64_to_cpu(le->vcn) != vcn) + return false; + + /* + * The caller specified a segment reference, so we have to + * scan through the matching entries until we find that segment + * reference or we run of matching entries. + */ + if (ref && memcmp(ref, &le->ref, sizeof(*ref))) { + off += le16_to_cpu(le->size); + le = Add2Ptr(al->le, off); + goto next; + } + + /* Save on stack the size of 'le' */ + size = le16_to_cpu(le->size); + /* Delete 'le'. */ + memmove(le, Add2Ptr(le, size), al->size - (off + size)); + + al->size -= size; + al->dirty = true; + + return true; +} + +/* + * al_update + */ +int al_update(struct ntfs_inode *ni) +{ + int err; + struct ATTRIB *attr; + typeof(ni->attr_list) *al = &ni->attr_list; + + if (!al->dirty || !al->size) + return 0; + + /* + * attribute list increased on demand in al_add_le + * attribute list decreased here + */ + err = attr_set_size(ni, ATTR_LIST, NULL, 0, &al->run, al->size, NULL, + false, &attr); + if (err) + goto out; + + if (!attr->non_res) { + memcpy(resident_data(attr), al->le, al->size); + } else { + err = ntfs_sb_write_run(ni->mi.sbi, &al->run, 0, al->le, + al->size); + if (err) + goto out; + + attr->nres.valid_size = attr->nres.data_size; + } + + ni->mi.dirty = true; + al->dirty = false; + +out: + return err; +} diff --git a/fs/ntfs3/bitfunc.c b/fs/ntfs3/bitfunc.c new file mode 100644 index 000000000000..2de5faef2721 --- /dev/null +++ b/fs/ntfs3/bitfunc.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + */ +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" + +#define BITS_IN_SIZE_T (sizeof(size_t) * 8) + +/* + * fill_mask[i] - first i bits are '1' , i = 0,1,2,3,4,5,6,7,8 + * fill_mask[i] = 0xFF >> (8-i) + */ +static const u8 fill_mask[] = { 0x00, 0x01, 0x03, 0x07, 0x0F, + 0x1F, 0x3F, 0x7F, 0xFF }; + +/* + * zero_mask[i] - first i bits are '0' , i = 0,1,2,3,4,5,6,7,8 + * zero_mask[i] = 0xFF << i + */ +static const u8 zero_mask[] = { 0xFF, 0xFE, 0xFC, 0xF8, 0xF0, + 0xE0, 0xC0, 0x80, 0x00 }; + +/* + * are_bits_clear + * + * Returns true if all bits [bit, bit+nbits) are zeros "0" + */ +bool are_bits_clear(const ulong *lmap, size_t bit, size_t nbits) +{ + size_t pos = bit & 7; + const u8 *map = (u8 *)lmap + (bit >> 3); + + if (pos) { + if (8 - pos >= nbits) + return !nbits || !(*map & fill_mask[pos + nbits] & + zero_mask[pos]); + + if (*map++ & zero_mask[pos]) + return false; + nbits -= 8 - pos; + } + + pos = ((size_t)map) & (sizeof(size_t) - 1); + if (pos) { + pos = sizeof(size_t) - pos; + if (nbits >= pos * 8) { + for (nbits -= pos * 8; pos; pos--, map++) { + if (*map) + return false; + } + } + } + + for (pos = nbits / BITS_IN_SIZE_T; pos; pos--, map += sizeof(size_t)) { + if (*((size_t *)map)) + return false; + } + + for (pos = (nbits % BITS_IN_SIZE_T) >> 3; pos; pos--, map++) { + if (*map) + return false; + } + + pos = nbits & 7; + if (pos && (*map & fill_mask[pos])) + return false; + + // All bits are zero + return true; +} + +/* + * are_bits_set + * + * Returns true if all bits [bit, bit+nbits) are ones "1" + */ +bool are_bits_set(const ulong *lmap, size_t bit, size_t nbits) +{ + u8 mask; + size_t pos = bit & 7; + const u8 *map = (u8 *)lmap + (bit >> 3); + + if (pos) { + if (8 - pos >= nbits) { + mask = fill_mask[pos + nbits] & zero_mask[pos]; + return !nbits || (*map & mask) == mask; + } + + mask = zero_mask[pos]; + if ((*map++ & mask) != mask) + return false; + nbits -= 8 - pos; + } + + pos = ((size_t)map) & (sizeof(size_t) - 1); + if (pos) { + pos = sizeof(size_t) - pos; + if (nbits >= pos * 8) { + for (nbits -= pos * 8; pos; pos--, map++) { + if (*map != 0xFF) + return false; + } + } + } + + for (pos = nbits / BITS_IN_SIZE_T; pos; pos--, map += sizeof(size_t)) { + if (*((size_t *)map) != MINUS_ONE_T) + return false; + } + + for (pos = (nbits % BITS_IN_SIZE_T) >> 3; pos; pos--, map++) { + if (*map != 0xFF) + return false; + } + + pos = nbits & 7; + if (pos) { + u8 mask = fill_mask[pos]; + + if ((*map & mask) != mask) + return false; + } + + // All bits are ones + return true; +} diff --git a/fs/ntfs3/bitmap.c b/fs/ntfs3/bitmap.c new file mode 100644 index 000000000000..32aab0031221 --- /dev/null +++ b/fs/ntfs3/bitmap.c @@ -0,0 +1,1519 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + * This code builds two trees of free clusters extents. + * Trees are sorted by start of extent and by length of extent. + * NTFS_MAX_WND_EXTENTS defines the maximum number of elements in trees. + * In extreme case code reads on-disk bitmap to find free clusters + * + */ + +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" + +/* + * Maximum number of extents in tree. + */ +#define NTFS_MAX_WND_EXTENTS (32u * 1024u) + +struct rb_node_key { + struct rb_node node; + size_t key; +}; + +/* + * Tree is sorted by start (key) + */ +struct e_node { + struct rb_node_key start; /* Tree sorted by start */ + struct rb_node_key count; /* Tree sorted by len*/ +}; + +static int wnd_rescan(struct wnd_bitmap *wnd); +static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw); +static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits); + +static struct kmem_cache *ntfs_enode_cachep; + +int __init ntfs3_init_bitmap(void) +{ + ntfs_enode_cachep = + kmem_cache_create("ntfs3_enode_cache", sizeof(struct e_node), 0, + SLAB_RECLAIM_ACCOUNT, NULL); + return ntfs_enode_cachep ? 0 : -ENOMEM; +} + +void ntfs3_exit_bitmap(void) +{ + kmem_cache_destroy(ntfs_enode_cachep); +} + +static inline u32 wnd_bits(const struct wnd_bitmap *wnd, size_t i) +{ + return i + 1 == wnd->nwnd ? wnd->bits_last : wnd->sb->s_blocksize * 8; +} + +/* + * b_pos + b_len - biggest fragment + * Scan range [wpos wbits) window 'buf' + * Returns -1 if not found + */ +static size_t wnd_scan(const ulong *buf, size_t wbit, u32 wpos, u32 wend, + size_t to_alloc, size_t *prev_tail, size_t *b_pos, + size_t *b_len) +{ + while (wpos < wend) { + size_t free_len; + u32 free_bits, end; + u32 used = find_next_zero_bit(buf, wend, wpos); + + if (used >= wend) { + if (*b_len < *prev_tail) { + *b_pos = wbit - *prev_tail; + *b_len = *prev_tail; + } + + *prev_tail = 0; + return -1; + } + + if (used > wpos) { + wpos = used; + if (*b_len < *prev_tail) { + *b_pos = wbit - *prev_tail; + *b_len = *prev_tail; + } + + *prev_tail = 0; + } + + /* + * Now we have a fragment [wpos, wend) staring with 0 + */ + end = wpos + to_alloc - *prev_tail; + free_bits = find_next_bit(buf, min(end, wend), wpos); + + free_len = *prev_tail + free_bits - wpos; + + if (*b_len < free_len) { + *b_pos = wbit + wpos - *prev_tail; + *b_len = free_len; + } + + if (free_len >= to_alloc) + return wbit + wpos - *prev_tail; + + if (free_bits >= wend) { + *prev_tail += free_bits - wpos; + return -1; + } + + wpos = free_bits + 1; + + *prev_tail = 0; + } + + return -1; +} + +/* + * wnd_close + * + * Frees all resources + */ +void wnd_close(struct wnd_bitmap *wnd) +{ + struct rb_node *node, *next; + + ntfs_free(wnd->free_bits); + run_close(&wnd->run); + + node = rb_first(&wnd->start_tree); + + while (node) { + next = rb_next(node); + rb_erase(node, &wnd->start_tree); + kmem_cache_free(ntfs_enode_cachep, + rb_entry(node, struct e_node, start.node)); + node = next; + } +} + +static struct rb_node *rb_lookup(struct rb_root *root, size_t v) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *r = NULL; + + while (*p) { + struct rb_node_key *k; + + k = rb_entry(*p, struct rb_node_key, node); + if (v < k->key) { + p = &(*p)->rb_left; + } else if (v > k->key) { + r = &k->node; + p = &(*p)->rb_right; + } else { + return &k->node; + } + } + + return r; +} + +/* + * rb_insert_count + * + * Helper function to insert special kind of 'count' tree + */ +static inline bool rb_insert_count(struct rb_root *root, struct e_node *e) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + size_t e_ckey = e->count.key; + size_t e_skey = e->start.key; + + while (*p) { + struct e_node *k = + rb_entry(parent = *p, struct e_node, count.node); + + if (e_ckey > k->count.key) { + p = &(*p)->rb_left; + } else if (e_ckey < k->count.key) { + p = &(*p)->rb_right; + } else if (e_skey < k->start.key) { + p = &(*p)->rb_left; + } else if (e_skey > k->start.key) { + p = &(*p)->rb_right; + } else { + WARN_ON(1); + return false; + } + } + + rb_link_node(&e->count.node, parent, p); + rb_insert_color(&e->count.node, root); + return true; +} + +/* + * inline bool rb_insert_start + * + * Helper function to insert special kind of 'start' tree + */ +static inline bool rb_insert_start(struct rb_root *root, struct e_node *e) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + size_t e_skey = e->start.key; + + while (*p) { + struct e_node *k; + + parent = *p; + + k = rb_entry(parent, struct e_node, start.node); + if (e_skey < k->start.key) { + p = &(*p)->rb_left; + } else if (e_skey > k->start.key) { + p = &(*p)->rb_right; + } else { + WARN_ON(1); + return false; + } + } + + rb_link_node(&e->start.node, parent, p); + rb_insert_color(&e->start.node, root); + return true; +} + +/* + * wnd_add_free_ext + * + * adds a new extent of free space + * build = 1 when building tree + */ +static void wnd_add_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len, + bool build) +{ + struct e_node *e, *e0 = NULL; + size_t ib, end_in = bit + len; + struct rb_node *n; + + if (build) { + /* Use extent_min to filter too short extents */ + if (wnd->count >= NTFS_MAX_WND_EXTENTS && + len <= wnd->extent_min) { + wnd->uptodated = -1; + return; + } + } else { + /* Try to find extent before 'bit' */ + n = rb_lookup(&wnd->start_tree, bit); + + if (!n) { + n = rb_first(&wnd->start_tree); + } else { + e = rb_entry(n, struct e_node, start.node); + n = rb_next(n); + if (e->start.key + e->count.key == bit) { + /* Remove left */ + bit = e->start.key; + len += e->count.key; + rb_erase(&e->start.node, &wnd->start_tree); + rb_erase(&e->count.node, &wnd->count_tree); + wnd->count -= 1; + e0 = e; + } + } + + while (n) { + size_t next_end; + + e = rb_entry(n, struct e_node, start.node); + next_end = e->start.key + e->count.key; + if (e->start.key > end_in) + break; + + /* Remove right */ + n = rb_next(n); + len += next_end - end_in; + end_in = next_end; + rb_erase(&e->start.node, &wnd->start_tree); + rb_erase(&e->count.node, &wnd->count_tree); + wnd->count -= 1; + + if (!e0) + e0 = e; + else + kmem_cache_free(ntfs_enode_cachep, e); + } + + if (wnd->uptodated != 1) { + /* Check bits before 'bit' */ + ib = wnd->zone_bit == wnd->zone_end || + bit < wnd->zone_end + ? 0 + : wnd->zone_end; + + while (bit > ib && wnd_is_free_hlp(wnd, bit - 1, 1)) { + bit -= 1; + len += 1; + } + + /* Check bits after 'end_in' */ + ib = wnd->zone_bit == wnd->zone_end || + end_in > wnd->zone_bit + ? wnd->nbits + : wnd->zone_bit; + + while (end_in < ib && wnd_is_free_hlp(wnd, end_in, 1)) { + end_in += 1; + len += 1; + } + } + } + /* Insert new fragment */ + if (wnd->count >= NTFS_MAX_WND_EXTENTS) { + if (e0) + kmem_cache_free(ntfs_enode_cachep, e0); + + wnd->uptodated = -1; + + /* Compare with smallest fragment */ + n = rb_last(&wnd->count_tree); + e = rb_entry(n, struct e_node, count.node); + if (len <= e->count.key) + goto out; /* Do not insert small fragments */ + + if (build) { + struct e_node *e2; + + n = rb_prev(n); + e2 = rb_entry(n, struct e_node, count.node); + /* smallest fragment will be 'e2->count.key' */ + wnd->extent_min = e2->count.key; + } + + /* Replace smallest fragment by new one */ + rb_erase(&e->start.node, &wnd->start_tree); + rb_erase(&e->count.node, &wnd->count_tree); + wnd->count -= 1; + } else { + e = e0 ? e0 : kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); + if (!e) { + wnd->uptodated = -1; + goto out; + } + + if (build && len <= wnd->extent_min) + wnd->extent_min = len; + } + e->start.key = bit; + e->count.key = len; + if (len > wnd->extent_max) + wnd->extent_max = len; + + rb_insert_start(&wnd->start_tree, e); + rb_insert_count(&wnd->count_tree, e); + wnd->count += 1; + +out:; +} + +/* + * wnd_remove_free_ext + * + * removes a run from the cached free space + */ +static void wnd_remove_free_ext(struct wnd_bitmap *wnd, size_t bit, size_t len) +{ + struct rb_node *n, *n3; + struct e_node *e, *e3; + size_t end_in = bit + len; + size_t end3, end, new_key, new_len, max_new_len; + + /* Try to find extent before 'bit' */ + n = rb_lookup(&wnd->start_tree, bit); + + if (!n) + return; + + e = rb_entry(n, struct e_node, start.node); + end = e->start.key + e->count.key; + + new_key = new_len = 0; + len = e->count.key; + + /* Range [bit,end_in) must be inside 'e' or outside 'e' and 'n' */ + if (e->start.key > bit) + ; + else if (end_in <= end) { + /* Range [bit,end_in) inside 'e' */ + new_key = end_in; + new_len = end - end_in; + len = bit - e->start.key; + } else if (bit > end) { + bool bmax = false; + + n3 = rb_next(n); + + while (n3) { + e3 = rb_entry(n3, struct e_node, start.node); + if (e3->start.key >= end_in) + break; + + if (e3->count.key == wnd->extent_max) + bmax = true; + + end3 = e3->start.key + e3->count.key; + if (end3 > end_in) { + e3->start.key = end_in; + rb_erase(&e3->count.node, &wnd->count_tree); + e3->count.key = end3 - end_in; + rb_insert_count(&wnd->count_tree, e3); + break; + } + + n3 = rb_next(n3); + rb_erase(&e3->start.node, &wnd->start_tree); + rb_erase(&e3->count.node, &wnd->count_tree); + wnd->count -= 1; + kmem_cache_free(ntfs_enode_cachep, e3); + } + if (!bmax) + return; + n3 = rb_first(&wnd->count_tree); + wnd->extent_max = + n3 ? rb_entry(n3, struct e_node, count.node)->count.key + : 0; + return; + } + + if (e->count.key != wnd->extent_max) { + ; + } else if (rb_prev(&e->count.node)) { + ; + } else { + n3 = rb_next(&e->count.node); + max_new_len = len > new_len ? len : new_len; + if (!n3) { + wnd->extent_max = max_new_len; + } else { + e3 = rb_entry(n3, struct e_node, count.node); + wnd->extent_max = max(e3->count.key, max_new_len); + } + } + + if (!len) { + if (new_len) { + e->start.key = new_key; + rb_erase(&e->count.node, &wnd->count_tree); + e->count.key = new_len; + rb_insert_count(&wnd->count_tree, e); + } else { + rb_erase(&e->start.node, &wnd->start_tree); + rb_erase(&e->count.node, &wnd->count_tree); + wnd->count -= 1; + kmem_cache_free(ntfs_enode_cachep, e); + } + goto out; + } + rb_erase(&e->count.node, &wnd->count_tree); + e->count.key = len; + rb_insert_count(&wnd->count_tree, e); + + if (!new_len) + goto out; + + if (wnd->count >= NTFS_MAX_WND_EXTENTS) { + wnd->uptodated = -1; + + /* Get minimal extent */ + e = rb_entry(rb_last(&wnd->count_tree), struct e_node, + count.node); + if (e->count.key > new_len) + goto out; + + /* Replace minimum */ + rb_erase(&e->start.node, &wnd->start_tree); + rb_erase(&e->count.node, &wnd->count_tree); + wnd->count -= 1; + } else { + e = kmem_cache_alloc(ntfs_enode_cachep, GFP_ATOMIC); + if (!e) + wnd->uptodated = -1; + } + + if (e) { + e->start.key = new_key; + e->count.key = new_len; + rb_insert_start(&wnd->start_tree, e); + rb_insert_count(&wnd->count_tree, e); + wnd->count += 1; + } + +out: + if (!wnd->count && 1 != wnd->uptodated) + wnd_rescan(wnd); +} + +/* + * wnd_rescan + * + * Scan all bitmap. used while initialization. + */ +static int wnd_rescan(struct wnd_bitmap *wnd) +{ + int err = 0; + size_t prev_tail = 0; + struct super_block *sb = wnd->sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + u64 lbo, len = 0; + u32 blocksize = sb->s_blocksize; + u8 cluster_bits = sbi->cluster_bits; + u32 wbits = 8 * sb->s_blocksize; + u32 used, frb; + const ulong *buf; + size_t wpos, wbit, iw, vbo; + struct buffer_head *bh = NULL; + CLST lcn, clen; + + wnd->uptodated = 0; + wnd->extent_max = 0; + wnd->extent_min = MINUS_ONE_T; + wnd->total_zeroes = 0; + + vbo = 0; + + for (iw = 0; iw < wnd->nwnd; iw++) { + if (iw + 1 == wnd->nwnd) + wbits = wnd->bits_last; + + if (wnd->inited) { + if (!wnd->free_bits[iw]) { + /* all ones */ + if (prev_tail) { + wnd_add_free_ext(wnd, + vbo * 8 - prev_tail, + prev_tail, true); + prev_tail = 0; + } + goto next_wnd; + } + if (wbits == wnd->free_bits[iw]) { + /* all zeroes */ + prev_tail += wbits; + wnd->total_zeroes += wbits; + goto next_wnd; + } + } + + if (!len) { + u32 off = vbo & sbi->cluster_mask; + + if (!run_lookup_entry(&wnd->run, vbo >> cluster_bits, + &lcn, &clen, NULL)) { + err = -ENOENT; + goto out; + } + + lbo = ((u64)lcn << cluster_bits) + off; + len = ((u64)clen << cluster_bits) - off; + } + + bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); + if (!bh) { + err = -EIO; + goto out; + } + + buf = (ulong *)bh->b_data; + + used = __bitmap_weight(buf, wbits); + if (used < wbits) { + frb = wbits - used; + wnd->free_bits[iw] = frb; + wnd->total_zeroes += frb; + } + + wpos = 0; + wbit = vbo * 8; + + if (wbit + wbits > wnd->nbits) + wbits = wnd->nbits - wbit; + + do { + used = find_next_zero_bit(buf, wbits, wpos); + + if (used > wpos && prev_tail) { + wnd_add_free_ext(wnd, wbit + wpos - prev_tail, + prev_tail, true); + prev_tail = 0; + } + + wpos = used; + + if (wpos >= wbits) { + /* No free blocks */ + prev_tail = 0; + break; + } + + frb = find_next_bit(buf, wbits, wpos); + if (frb >= wbits) { + /* keep last free block */ + prev_tail += frb - wpos; + break; + } + + wnd_add_free_ext(wnd, wbit + wpos - prev_tail, + frb + prev_tail - wpos, true); + + /* Skip free block and first '1' */ + wpos = frb + 1; + /* Reset previous tail */ + prev_tail = 0; + } while (wpos < wbits); + +next_wnd: + + if (bh) + put_bh(bh); + bh = NULL; + + vbo += blocksize; + if (len) { + len -= blocksize; + lbo += blocksize; + } + } + + /* Add last block */ + if (prev_tail) + wnd_add_free_ext(wnd, wnd->nbits - prev_tail, prev_tail, true); + + /* + * Before init cycle wnd->uptodated was 0 + * If any errors or limits occurs while initialization then + * wnd->uptodated will be -1 + * If 'uptodated' is still 0 then Tree is really updated + */ + if (!wnd->uptodated) + wnd->uptodated = 1; + + if (wnd->zone_bit != wnd->zone_end) { + size_t zlen = wnd->zone_end - wnd->zone_bit; + + wnd->zone_end = wnd->zone_bit; + wnd_zone_set(wnd, wnd->zone_bit, zlen); + } + +out: + return err; +} + +/* + * wnd_init + */ +int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits) +{ + int err; + u32 blocksize = sb->s_blocksize; + u32 wbits = blocksize * 8; + + init_rwsem(&wnd->rw_lock); + + wnd->sb = sb; + wnd->nbits = nbits; + wnd->total_zeroes = nbits; + wnd->extent_max = MINUS_ONE_T; + wnd->zone_bit = wnd->zone_end = 0; + wnd->nwnd = bytes_to_block(sb, bitmap_size(nbits)); + wnd->bits_last = nbits & (wbits - 1); + if (!wnd->bits_last) + wnd->bits_last = wbits; + + wnd->free_bits = ntfs_zalloc(wnd->nwnd * sizeof(u16)); + if (!wnd->free_bits) + return -ENOMEM; + + err = wnd_rescan(wnd); + if (err) + return err; + + wnd->inited = true; + + return 0; +} + +/* + * wnd_map + * + * call sb_bread for requested window + */ +static struct buffer_head *wnd_map(struct wnd_bitmap *wnd, size_t iw) +{ + size_t vbo; + CLST lcn, clen; + struct super_block *sb = wnd->sb; + struct ntfs_sb_info *sbi; + struct buffer_head *bh; + u64 lbo; + + sbi = sb->s_fs_info; + vbo = (u64)iw << sb->s_blocksize_bits; + + if (!run_lookup_entry(&wnd->run, vbo >> sbi->cluster_bits, &lcn, &clen, + NULL)) { + return ERR_PTR(-ENOENT); + } + + lbo = ((u64)lcn << sbi->cluster_bits) + (vbo & sbi->cluster_mask); + + bh = ntfs_bread(wnd->sb, lbo >> sb->s_blocksize_bits); + if (!bh) + return ERR_PTR(-EIO); + + return bh; +} + +/* + * wnd_set_free + * + * Marks the bits range from bit to bit + bits as free + */ +int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) +{ + int err = 0; + struct super_block *sb = wnd->sb; + size_t bits0 = bits; + u32 wbits = 8 * sb->s_blocksize; + size_t iw = bit >> (sb->s_blocksize_bits + 3); + u32 wbit = bit & (wbits - 1); + struct buffer_head *bh; + + while (iw < wnd->nwnd && bits) { + u32 tail, op; + ulong *buf; + + if (iw + 1 == wnd->nwnd) + wbits = wnd->bits_last; + + tail = wbits - wbit; + op = tail < bits ? tail : bits; + + bh = wnd_map(wnd, iw); + if (IS_ERR(bh)) { + err = PTR_ERR(bh); + break; + } + + buf = (ulong *)bh->b_data; + + lock_buffer(bh); + + __bitmap_clear(buf, wbit, op); + + wnd->free_bits[iw] += op; + + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + unlock_buffer(bh); + put_bh(bh); + + wnd->total_zeroes += op; + bits -= op; + wbit = 0; + iw += 1; + } + + wnd_add_free_ext(wnd, bit, bits0, false); + + return err; +} + +/* + * wnd_set_used + * + * Marks the bits range from bit to bit + bits as used + */ +int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) +{ + int err = 0; + struct super_block *sb = wnd->sb; + size_t bits0 = bits; + size_t iw = bit >> (sb->s_blocksize_bits + 3); + u32 wbits = 8 * sb->s_blocksize; + u32 wbit = bit & (wbits - 1); + struct buffer_head *bh; + + while (iw < wnd->nwnd && bits) { + u32 tail, op; + ulong *buf; + + if (unlikely(iw + 1 == wnd->nwnd)) + wbits = wnd->bits_last; + + tail = wbits - wbit; + op = tail < bits ? tail : bits; + + bh = wnd_map(wnd, iw); + if (IS_ERR(bh)) { + err = PTR_ERR(bh); + break; + } + buf = (ulong *)bh->b_data; + + lock_buffer(bh); + + __bitmap_set(buf, wbit, op); + wnd->free_bits[iw] -= op; + + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + unlock_buffer(bh); + put_bh(bh); + + wnd->total_zeroes -= op; + bits -= op; + wbit = 0; + iw += 1; + } + + if (!RB_EMPTY_ROOT(&wnd->start_tree)) + wnd_remove_free_ext(wnd, bit, bits0); + + return err; +} + +/* + * wnd_is_free_hlp + * + * Returns true if all clusters [bit, bit+bits) are free (bitmap only) + */ +static bool wnd_is_free_hlp(struct wnd_bitmap *wnd, size_t bit, size_t bits) +{ + struct super_block *sb = wnd->sb; + size_t iw = bit >> (sb->s_blocksize_bits + 3); + u32 wbits = 8 * sb->s_blocksize; + u32 wbit = bit & (wbits - 1); + + while (iw < wnd->nwnd && bits) { + u32 tail, op; + + if (unlikely(iw + 1 == wnd->nwnd)) + wbits = wnd->bits_last; + + tail = wbits - wbit; + op = tail < bits ? tail : bits; + + if (wbits != wnd->free_bits[iw]) { + bool ret; + struct buffer_head *bh = wnd_map(wnd, iw); + + if (IS_ERR(bh)) + return false; + + ret = are_bits_clear((ulong *)bh->b_data, wbit, op); + + put_bh(bh); + if (!ret) + return false; + } + + bits -= op; + wbit = 0; + iw += 1; + } + + return true; +} + +/* + * wnd_is_free + * + * Returns true if all clusters [bit, bit+bits) are free + */ +bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits) +{ + bool ret; + struct rb_node *n; + size_t end; + struct e_node *e; + + if (RB_EMPTY_ROOT(&wnd->start_tree)) + goto use_wnd; + + n = rb_lookup(&wnd->start_tree, bit); + if (!n) + goto use_wnd; + + e = rb_entry(n, struct e_node, start.node); + + end = e->start.key + e->count.key; + + if (bit < end && bit + bits <= end) + return true; + +use_wnd: + ret = wnd_is_free_hlp(wnd, bit, bits); + + return ret; +} + +/* + * wnd_is_used + * + * Returns true if all clusters [bit, bit+bits) are used + */ +bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits) +{ + bool ret = false; + struct super_block *sb = wnd->sb; + size_t iw = bit >> (sb->s_blocksize_bits + 3); + u32 wbits = 8 * sb->s_blocksize; + u32 wbit = bit & (wbits - 1); + size_t end; + struct rb_node *n; + struct e_node *e; + + if (RB_EMPTY_ROOT(&wnd->start_tree)) + goto use_wnd; + + end = bit + bits; + n = rb_lookup(&wnd->start_tree, end - 1); + if (!n) + goto use_wnd; + + e = rb_entry(n, struct e_node, start.node); + if (e->start.key + e->count.key > bit) + return false; + +use_wnd: + while (iw < wnd->nwnd && bits) { + u32 tail, op; + + if (unlikely(iw + 1 == wnd->nwnd)) + wbits = wnd->bits_last; + + tail = wbits - wbit; + op = tail < bits ? tail : bits; + + if (wnd->free_bits[iw]) { + bool ret; + struct buffer_head *bh = wnd_map(wnd, iw); + + if (IS_ERR(bh)) + goto out; + + ret = are_bits_set((ulong *)bh->b_data, wbit, op); + put_bh(bh); + if (!ret) + goto out; + } + + bits -= op; + wbit = 0; + iw += 1; + } + ret = true; + +out: + return ret; +} + +/* + * wnd_find + * - flags - BITMAP_FIND_XXX flags + * + * looks for free space + * Returns 0 if not found + */ +size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint, + size_t flags, size_t *allocated) +{ + struct super_block *sb; + u32 wbits, wpos, wzbit, wzend; + size_t fnd, max_alloc, b_len, b_pos; + size_t iw, prev_tail, nwnd, wbit, ebit, zbit, zend; + size_t to_alloc0 = to_alloc; + const ulong *buf; + const struct e_node *e; + const struct rb_node *pr, *cr; + u8 log2_bits; + bool fbits_valid; + struct buffer_head *bh; + + /* fast checking for available free space */ + if (flags & BITMAP_FIND_FULL) { + size_t zeroes = wnd_zeroes(wnd); + + zeroes -= wnd->zone_end - wnd->zone_bit; + if (zeroes < to_alloc0) + goto no_space; + + if (to_alloc0 > wnd->extent_max) + goto no_space; + } else { + if (to_alloc > wnd->extent_max) + to_alloc = wnd->extent_max; + } + + if (wnd->zone_bit <= hint && hint < wnd->zone_end) + hint = wnd->zone_end; + + max_alloc = wnd->nbits; + b_len = b_pos = 0; + + if (hint >= max_alloc) + hint = 0; + + if (RB_EMPTY_ROOT(&wnd->start_tree)) { + if (wnd->uptodated == 1) { + /* extents tree is updated -> no free space */ + goto no_space; + } + goto scan_bitmap; + } + + e = NULL; + if (!hint) + goto allocate_biggest; + + /* Use hint: enumerate extents by start >= hint */ + pr = NULL; + cr = wnd->start_tree.rb_node; + + for (;;) { + e = rb_entry(cr, struct e_node, start.node); + + if (e->start.key == hint) + break; + + if (e->start.key < hint) { + pr = cr; + cr = cr->rb_right; + if (!cr) + break; + continue; + } + + cr = cr->rb_left; + if (!cr) { + e = pr ? rb_entry(pr, struct e_node, start.node) : NULL; + break; + } + } + + if (!e) + goto allocate_biggest; + + if (e->start.key + e->count.key > hint) { + /* We have found extension with 'hint' inside */ + size_t len = e->start.key + e->count.key - hint; + + if (len >= to_alloc && hint + to_alloc <= max_alloc) { + fnd = hint; + goto found; + } + + if (!(flags & BITMAP_FIND_FULL)) { + if (len > to_alloc) + len = to_alloc; + + if (hint + len <= max_alloc) { + fnd = hint; + to_alloc = len; + goto found; + } + } + } + +allocate_biggest: + /* Allocate from biggest free extent */ + e = rb_entry(rb_first(&wnd->count_tree), struct e_node, count.node); + if (e->count.key != wnd->extent_max) + wnd->extent_max = e->count.key; + + if (e->count.key < max_alloc) { + if (e->count.key >= to_alloc) { + ; + } else if (flags & BITMAP_FIND_FULL) { + if (e->count.key < to_alloc0) { + /* Biggest free block is less then requested */ + goto no_space; + } + to_alloc = e->count.key; + } else if (-1 != wnd->uptodated) { + to_alloc = e->count.key; + } else { + /* Check if we can use more bits */ + size_t op, max_check; + struct rb_root start_tree; + + memcpy(&start_tree, &wnd->start_tree, + sizeof(struct rb_root)); + memset(&wnd->start_tree, 0, sizeof(struct rb_root)); + + max_check = e->start.key + to_alloc; + if (max_check > max_alloc) + max_check = max_alloc; + for (op = e->start.key + e->count.key; op < max_check; + op++) { + if (!wnd_is_free(wnd, op, 1)) + break; + } + memcpy(&wnd->start_tree, &start_tree, + sizeof(struct rb_root)); + to_alloc = op - e->start.key; + } + + /* Prepare to return */ + fnd = e->start.key; + if (e->start.key + to_alloc > max_alloc) + to_alloc = max_alloc - e->start.key; + goto found; + } + + if (wnd->uptodated == 1) { + /* extents tree is updated -> no free space */ + goto no_space; + } + + b_len = e->count.key; + b_pos = e->start.key; + +scan_bitmap: + sb = wnd->sb; + log2_bits = sb->s_blocksize_bits + 3; + + /* At most two ranges [hint, max_alloc) + [0, hint) */ +Again: + + /* TODO: optimize request for case nbits > wbits */ + iw = hint >> log2_bits; + wbits = sb->s_blocksize * 8; + wpos = hint & (wbits - 1); + prev_tail = 0; + fbits_valid = true; + + if (max_alloc == wnd->nbits) { + nwnd = wnd->nwnd; + } else { + size_t t = max_alloc + wbits - 1; + + nwnd = likely(t > max_alloc) ? (t >> log2_bits) : wnd->nwnd; + } + + /* Enumerate all windows */ + for (; iw < nwnd; iw++) { + wbit = iw << log2_bits; + + if (!wnd->free_bits[iw]) { + if (prev_tail > b_len) { + b_pos = wbit - prev_tail; + b_len = prev_tail; + } + + /* Skip full used window */ + prev_tail = 0; + wpos = 0; + continue; + } + + if (unlikely(iw + 1 == nwnd)) { + if (max_alloc == wnd->nbits) { + wbits = wnd->bits_last; + } else { + size_t t = max_alloc & (wbits - 1); + + if (t) { + wbits = t; + fbits_valid = false; + } + } + } + + if (wnd->zone_end > wnd->zone_bit) { + ebit = wbit + wbits; + zbit = max(wnd->zone_bit, wbit); + zend = min(wnd->zone_end, ebit); + + /* Here we have a window [wbit, ebit) and zone [zbit, zend) */ + if (zend <= zbit) { + /* Zone does not overlap window */ + } else { + wzbit = zbit - wbit; + wzend = zend - wbit; + + /* Zone overlaps window */ + if (wnd->free_bits[iw] == wzend - wzbit) { + prev_tail = 0; + wpos = 0; + continue; + } + + /* Scan two ranges window: [wbit, zbit) and [zend, ebit) */ + bh = wnd_map(wnd, iw); + + if (IS_ERR(bh)) { + /* TODO: error */ + prev_tail = 0; + wpos = 0; + continue; + } + + buf = (ulong *)bh->b_data; + + /* Scan range [wbit, zbit) */ + if (wpos < wzbit) { + /* Scan range [wpos, zbit) */ + fnd = wnd_scan(buf, wbit, wpos, wzbit, + to_alloc, &prev_tail, + &b_pos, &b_len); + if (fnd != MINUS_ONE_T) { + put_bh(bh); + goto found; + } + } + + prev_tail = 0; + + /* Scan range [zend, ebit) */ + if (wzend < wbits) { + fnd = wnd_scan(buf, wbit, + max(wzend, wpos), wbits, + to_alloc, &prev_tail, + &b_pos, &b_len); + if (fnd != MINUS_ONE_T) { + put_bh(bh); + goto found; + } + } + + wpos = 0; + put_bh(bh); + continue; + } + } + + /* Current window does not overlap zone */ + if (!wpos && fbits_valid && wnd->free_bits[iw] == wbits) { + /* window is empty */ + if (prev_tail + wbits >= to_alloc) { + fnd = wbit + wpos - prev_tail; + goto found; + } + + /* Increase 'prev_tail' and process next window */ + prev_tail += wbits; + wpos = 0; + continue; + } + + /* read window */ + bh = wnd_map(wnd, iw); + if (IS_ERR(bh)) { + // TODO: error + prev_tail = 0; + wpos = 0; + continue; + } + + buf = (ulong *)bh->b_data; + + /* Scan range [wpos, eBits) */ + fnd = wnd_scan(buf, wbit, wpos, wbits, to_alloc, &prev_tail, + &b_pos, &b_len); + put_bh(bh); + if (fnd != MINUS_ONE_T) + goto found; + } + + if (b_len < prev_tail) { + /* The last fragment */ + b_len = prev_tail; + b_pos = max_alloc - prev_tail; + } + + if (hint) { + /* + * We have scanned range [hint max_alloc) + * Prepare to scan range [0 hint + to_alloc) + */ + size_t nextmax = hint + to_alloc; + + if (likely(nextmax >= hint) && nextmax < max_alloc) + max_alloc = nextmax; + hint = 0; + goto Again; + } + + if (!b_len) + goto no_space; + + wnd->extent_max = b_len; + + if (flags & BITMAP_FIND_FULL) + goto no_space; + + fnd = b_pos; + to_alloc = b_len; + +found: + if (flags & BITMAP_FIND_MARK_AS_USED) { + /* TODO optimize remove extent (pass 'e'?) */ + if (wnd_set_used(wnd, fnd, to_alloc)) + goto no_space; + } else if (wnd->extent_max != MINUS_ONE_T && + to_alloc > wnd->extent_max) { + wnd->extent_max = to_alloc; + } + + *allocated = fnd; + return to_alloc; + +no_space: + return 0; +} + +/* + * wnd_extend + * + * Extend bitmap ($MFT bitmap) + */ +int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits) +{ + int err; + struct super_block *sb = wnd->sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + u32 blocksize = sb->s_blocksize; + u32 wbits = blocksize * 8; + u32 b0, new_last; + size_t bits, iw, new_wnd; + size_t old_bits = wnd->nbits; + u16 *new_free; + + if (new_bits <= old_bits) + return -EINVAL; + + /* align to 8 byte boundary */ + new_wnd = bytes_to_block(sb, bitmap_size(new_bits)); + new_last = new_bits & (wbits - 1); + if (!new_last) + new_last = wbits; + + if (new_wnd != wnd->nwnd) { + new_free = ntfs_malloc(new_wnd * sizeof(u16)); + if (!new_free) + return -ENOMEM; + + if (new_free != wnd->free_bits) + memcpy(new_free, wnd->free_bits, + wnd->nwnd * sizeof(short)); + memset(new_free + wnd->nwnd, 0, + (new_wnd - wnd->nwnd) * sizeof(short)); + ntfs_free(wnd->free_bits); + wnd->free_bits = new_free; + } + + /* Zero bits [old_bits,new_bits) */ + bits = new_bits - old_bits; + b0 = old_bits & (wbits - 1); + + for (iw = old_bits >> (sb->s_blocksize_bits + 3); bits; iw += 1) { + u32 op; + size_t frb; + u64 vbo, lbo, bytes; + struct buffer_head *bh; + ulong *buf; + + if (iw + 1 == new_wnd) + wbits = new_last; + + op = b0 + bits > wbits ? wbits - b0 : bits; + vbo = (u64)iw * blocksize; + + err = ntfs_vbo_to_lbo(sbi, &wnd->run, vbo, &lbo, &bytes); + if (err) + break; + + bh = ntfs_bread(sb, lbo >> sb->s_blocksize_bits); + if (!bh) + return -EIO; + + lock_buffer(bh); + buf = (ulong *)bh->b_data; + + __bitmap_clear(buf, b0, blocksize * 8 - b0); + frb = wbits - __bitmap_weight(buf, wbits); + wnd->total_zeroes += frb - wnd->free_bits[iw]; + wnd->free_bits[iw] = frb; + + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + unlock_buffer(bh); + /*err = sync_dirty_buffer(bh);*/ + + b0 = 0; + bits -= op; + } + + wnd->nbits = new_bits; + wnd->nwnd = new_wnd; + wnd->bits_last = new_last; + + wnd_add_free_ext(wnd, old_bits, new_bits - old_bits, false); + + return 0; +} + +/* + * wnd_zone_set + */ +void wnd_zone_set(struct wnd_bitmap *wnd, size_t lcn, size_t len) +{ + size_t zlen; + + zlen = wnd->zone_end - wnd->zone_bit; + if (zlen) + wnd_add_free_ext(wnd, wnd->zone_bit, zlen, false); + + if (!RB_EMPTY_ROOT(&wnd->start_tree) && len) + wnd_remove_free_ext(wnd, lcn, len); + + wnd->zone_bit = lcn; + wnd->zone_end = lcn + len; +} + +int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range) +{ + int err = 0; + struct super_block *sb = sbi->sb; + struct wnd_bitmap *wnd = &sbi->used.bitmap; + u32 wbits = 8 * sb->s_blocksize; + CLST len = 0, lcn = 0, done = 0; + CLST minlen = bytes_to_cluster(sbi, range->minlen); + CLST lcn_from = bytes_to_cluster(sbi, range->start); + size_t iw = lcn_from >> (sb->s_blocksize_bits + 3); + u32 wbit = lcn_from & (wbits - 1); + const ulong *buf; + CLST lcn_to; + + if (!minlen) + minlen = 1; + + if (range->len == (u64)-1) + lcn_to = wnd->nbits; + else + lcn_to = bytes_to_cluster(sbi, range->start + range->len); + + down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); + + for (; iw < wnd->nbits; iw++, wbit = 0) { + CLST lcn_wnd = iw * wbits; + struct buffer_head *bh; + + if (lcn_wnd > lcn_to) + break; + + if (!wnd->free_bits[iw]) + continue; + + if (iw + 1 == wnd->nwnd) + wbits = wnd->bits_last; + + if (lcn_wnd + wbits > lcn_to) + wbits = lcn_to - lcn_wnd; + + bh = wnd_map(wnd, iw); + if (IS_ERR(bh)) { + err = PTR_ERR(bh); + break; + } + + buf = (ulong *)bh->b_data; + + for (; wbit < wbits; wbit++) { + if (!test_bit(wbit, buf)) { + if (!len) + lcn = lcn_wnd + wbit; + len += 1; + continue; + } + if (len >= minlen) { + err = ntfs_discard(sbi, lcn, len); + if (err) + goto out; + done += len; + } + len = 0; + } + put_bh(bh); + } + + /* Process the last fragment */ + if (len >= minlen) { + err = ntfs_discard(sbi, lcn, len); + if (err) + goto out; + done += len; + } + +out: + range->len = (u64)done << sbi->cluster_bits; + + up_read(&wnd->rw_lock); + + return err; +} diff --git a/fs/ntfs3/debug.h b/fs/ntfs3/debug.h new file mode 100644 index 000000000000..dfaa4c79dc6d --- /dev/null +++ b/fs/ntfs3/debug.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + * useful functions for debuging + */ + +// clang-format off +#ifndef Add2Ptr +#define Add2Ptr(P, I) ((void *)((u8 *)(P) + (I))) +#define PtrOffset(B, O) ((size_t)((size_t)(O) - (size_t)(B))) +#endif + +#define QuadAlign(n) (((n) + 7u) & (~7u)) +#define IsQuadAligned(n) (!((size_t)(n)&7u)) +#define Quad2Align(n) (((n) + 15u) & (~15u)) +#define IsQuad2Aligned(n) (!((size_t)(n)&15u)) +#define Quad4Align(n) (((n) + 31u) & (~31u)) +#define IsSizeTAligned(n) (!((size_t)(n) & (sizeof(size_t) - 1))) +#define DwordAlign(n) (((n) + 3u) & (~3u)) +#define IsDwordAligned(n) (!((size_t)(n)&3u)) +#define WordAlign(n) (((n) + 1u) & (~1u)) +#define IsWordAligned(n) (!((size_t)(n)&1u)) + +#ifdef CONFIG_PRINTK +__printf(2, 3) +void ntfs_printk(const struct super_block *sb, const char *fmt, ...); +__printf(2, 3) +void ntfs_inode_printk(struct inode *inode, const char *fmt, ...); +#else +static inline __printf(2, 3) +void ntfs_printk(const struct super_block *sb, const char *fmt, ...) +{ +} + +static inline __printf(2, 3) +void ntfs_inode_printk(struct inode *inode, const char *fmt, ...) +{ +} +#endif + +/* + * Logging macros ( thanks Joe Perches for implementation ) + */ + +#define ntfs_err(sb, fmt, ...) ntfs_printk(sb, KERN_ERR fmt, ##__VA_ARGS__) +#define ntfs_warn(sb, fmt, ...) ntfs_printk(sb, KERN_WARNING fmt, ##__VA_ARGS__) +#define ntfs_info(sb, fmt, ...) ntfs_printk(sb, KERN_INFO fmt, ##__VA_ARGS__) +#define ntfs_notice(sb, fmt, ...) \ + ntfs_printk(sb, KERN_NOTICE fmt, ##__VA_ARGS__) + +#define ntfs_inode_err(inode, fmt, ...) \ + ntfs_inode_printk(inode, KERN_ERR fmt, ##__VA_ARGS__) +#define ntfs_inode_warn(inode, fmt, ...) \ + ntfs_inode_printk(inode, KERN_WARNING fmt, ##__VA_ARGS__) + +#define ntfs_malloc(s) kmalloc(s, GFP_NOFS) +#define ntfs_zalloc(s) kzalloc(s, GFP_NOFS) +#define ntfs_vmalloc(s) kvmalloc(s, GFP_KERNEL) +#define ntfs_free(p) kfree(p) +#define ntfs_vfree(p) kvfree(p) +#define ntfs_memdup(src, len) kmemdup(src, len, GFP_NOFS) +// clang-format on diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c new file mode 100644 index 000000000000..9ec6012c405b --- /dev/null +++ b/fs/ntfs3/dir.c @@ -0,0 +1,594 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + * directory handling functions for ntfs-based filesystems + * + */ +#include +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" + +/* + * Convert little endian utf16 to nls string + */ +int ntfs_utf16_to_nls(struct ntfs_sb_info *sbi, const struct le_str *uni, + u8 *buf, int buf_len) +{ + int ret, uni_len, warn; + const __le16 *ip; + u8 *op; + struct nls_table *nls = sbi->options.nls; + + static_assert(sizeof(wchar_t) == sizeof(__le16)); + + if (!nls) { + /* utf16 -> utf8 */ + ret = utf16s_to_utf8s((wchar_t *)uni->name, uni->len, + UTF16_LITTLE_ENDIAN, buf, buf_len); + buf[ret] = '\0'; + return ret; + } + + ip = uni->name; + op = buf; + uni_len = uni->len; + warn = 0; + + while (uni_len--) { + u16 ec; + int charlen; + char dump[5]; + + if (buf_len < NLS_MAX_CHARSET_SIZE) { + ntfs_warn(sbi->sb, + "filename was truncated while converting."); + break; + } + + ec = le16_to_cpu(*ip++); + charlen = nls->uni2char(ec, op, buf_len); + + if (charlen > 0) { + op += charlen; + buf_len -= charlen; + continue; + } + + *op++ = '_'; + buf_len -= 1; + if (warn) + continue; + + warn = 1; + hex_byte_pack(&dump[0], ec >> 8); + hex_byte_pack(&dump[2], ec); + dump[4] = 0; + + ntfs_err(sbi->sb, "failed to convert \"%s\" to %s", dump, + nls->charset); + } + + *op = '\0'; + return op - buf; +} + +// clang-format off +#define PLANE_SIZE 0x00010000 + +#define SURROGATE_PAIR 0x0000d800 +#define SURROGATE_LOW 0x00000400 +#define SURROGATE_BITS 0x000003ff +// clang-format on + +/* + * modified version of put_utf16 from fs/nls/nls_base.c + * is sparse warnings free + */ +static inline void put_utf16(wchar_t *s, unsigned int c, + enum utf16_endian endian) +{ + static_assert(sizeof(wchar_t) == sizeof(__le16)); + static_assert(sizeof(wchar_t) == sizeof(__be16)); + + switch (endian) { + default: + *s = (wchar_t)c; + break; + case UTF16_LITTLE_ENDIAN: + *(__le16 *)s = __cpu_to_le16(c); + break; + case UTF16_BIG_ENDIAN: + *(__be16 *)s = __cpu_to_be16(c); + break; + } +} + +/* + * modified version of 'utf8s_to_utf16s' allows to + * detect -ENAMETOOLONG without writing out of expected maximum + */ +static int _utf8s_to_utf16s(const u8 *s, int inlen, enum utf16_endian endian, + wchar_t *pwcs, int maxout) +{ + u16 *op; + int size; + unicode_t u; + + op = pwcs; + while (inlen > 0 && *s) { + if (*s & 0x80) { + size = utf8_to_utf32(s, inlen, &u); + if (size < 0) + return -EINVAL; + s += size; + inlen -= size; + + if (u >= PLANE_SIZE) { + if (maxout < 2) + return -ENAMETOOLONG; + + u -= PLANE_SIZE; + put_utf16(op++, + SURROGATE_PAIR | + ((u >> 10) & SURROGATE_BITS), + endian); + put_utf16(op++, + SURROGATE_PAIR | SURROGATE_LOW | + (u & SURROGATE_BITS), + endian); + maxout -= 2; + } else { + if (maxout < 1) + return -ENAMETOOLONG; + + put_utf16(op++, u, endian); + maxout--; + } + } else { + if (maxout < 1) + return -ENAMETOOLONG; + + put_utf16(op++, *s++, endian); + inlen--; + maxout--; + } + } + return op - pwcs; +} + +/* + * Convert input string to utf16 + * + * name, name_len - input name + * uni, max_ulen - destination memory + * endian - endian of target utf16 string + * + * This function is called: + * - to create ntfs name + * - to create symlink + * + * returns utf16 string length or error (if negative) + */ +int ntfs_nls_to_utf16(struct ntfs_sb_info *sbi, const u8 *name, u32 name_len, + struct cpu_str *uni, u32 max_ulen, + enum utf16_endian endian) +{ + int ret, slen; + const u8 *end; + struct nls_table *nls = sbi->options.nls; + u16 *uname = uni->name; + + static_assert(sizeof(wchar_t) == sizeof(u16)); + + if (!nls) { + /* utf8 -> utf16 */ + ret = _utf8s_to_utf16s(name, name_len, endian, uname, max_ulen); + uni->len = ret; + return ret; + } + + for (ret = 0, end = name + name_len; name < end; ret++, name += slen) { + if (ret >= max_ulen) + return -ENAMETOOLONG; + + slen = nls->char2uni(name, end - name, uname + ret); + if (!slen) + return -EINVAL; + if (slen < 0) + return slen; + } + +#ifdef __BIG_ENDIAN + if (endian == UTF16_LITTLE_ENDIAN) { + int i = ret; + + while (i--) { + __cpu_to_le16s(uname); + uname++; + } + } +#else + if (endian == UTF16_BIG_ENDIAN) { + int i = ret; + + while (i--) { + __cpu_to_be16s(uname); + uname++; + } + } +#endif + + uni->len = ret; + return ret; +} + +/* helper function */ +struct inode *dir_search_u(struct inode *dir, const struct cpu_str *uni, + struct ntfs_fnd *fnd) +{ + int err = 0; + struct super_block *sb = dir->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct ntfs_inode *ni = ntfs_i(dir); + struct NTFS_DE *e; + int diff; + struct inode *inode = NULL; + struct ntfs_fnd *fnd_a = NULL; + + if (!fnd) { + fnd_a = fnd_get(); + if (!fnd_a) { + err = -ENOMEM; + goto out; + } + fnd = fnd_a; + } + + err = indx_find(&ni->dir, ni, NULL, uni, 0, sbi, &diff, &e, fnd); + + if (err) + goto out; + + if (diff) { + err = -ENOENT; + goto out; + } + + inode = ntfs_iget5(sb, &e->ref, uni); + if (!IS_ERR(inode) && is_bad_inode(inode)) { + iput(inode); + err = -EINVAL; + } +out: + fnd_put(fnd_a); + + return err == -ENOENT ? NULL : err ? ERR_PTR(err) : inode; +} + +static inline int ntfs_filldir(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, + const struct NTFS_DE *e, u8 *name, + struct dir_context *ctx) +{ + const struct ATTR_FILE_NAME *fname; + unsigned long ino; + int name_len; + u32 dt_type; + + fname = Add2Ptr(e, sizeof(struct NTFS_DE)); + + if (fname->type == FILE_NAME_DOS) + return 0; + + if (!mi_is_ref(&ni->mi, &fname->home)) + return 0; + + ino = ino_get(&e->ref); + + if (ino == MFT_REC_ROOT) + return 0; + + /* Skip meta files ( unless option to show metafiles is set ) */ + if (!sbi->options.showmeta && ntfs_is_meta_file(sbi, ino)) + return 0; + + if (sbi->options.nohidden && (fname->dup.fa & FILE_ATTRIBUTE_HIDDEN)) + return 0; + + name_len = ntfs_utf16_to_nls(sbi, (struct le_str *)&fname->name_len, + name, PATH_MAX); + if (name_len <= 0) { + ntfs_warn(sbi->sb, "failed to convert name for inode %lx.", + ino); + return 0; + } + + dt_type = (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) ? DT_DIR : DT_REG; + + return !dir_emit(ctx, (s8 *)name, name_len, ino, dt_type); +} + +/* + * ntfs_read_hdr + * + * helper function 'ntfs_readdir' + */ +static int ntfs_read_hdr(struct ntfs_sb_info *sbi, struct ntfs_inode *ni, + const struct INDEX_HDR *hdr, u64 vbo, u64 pos, + u8 *name, struct dir_context *ctx) +{ + int err; + const struct NTFS_DE *e; + u32 e_size; + u32 end = le32_to_cpu(hdr->used); + u32 off = le32_to_cpu(hdr->de_off); + + for (;; off += e_size) { + if (off + sizeof(struct NTFS_DE) > end) + return -1; + + e = Add2Ptr(hdr, off); + e_size = le16_to_cpu(e->size); + if (e_size < sizeof(struct NTFS_DE) || off + e_size > end) + return -1; + + if (de_is_last(e)) + return 0; + + /* Skip already enumerated*/ + if (vbo + off < pos) + continue; + + if (le16_to_cpu(e->key_size) < SIZEOF_ATTRIBUTE_FILENAME) + return -1; + + ctx->pos = vbo + off; + + /* Submit the name to the filldir callback. */ + err = ntfs_filldir(sbi, ni, e, name, ctx); + if (err) + return err; + } +} + +/* + * file_operations::iterate_shared + * + * Use non sorted enumeration. + * We have an example of broken volume where sorted enumeration + * counts each name twice + */ +static int ntfs_readdir(struct file *file, struct dir_context *ctx) +{ + const struct INDEX_ROOT *root; + u64 vbo; + size_t bit; + loff_t eod; + int err = 0; + struct inode *dir = file_inode(file); + struct ntfs_inode *ni = ntfs_i(dir); + struct super_block *sb = dir->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + loff_t i_size = i_size_read(dir); + u32 pos = ctx->pos; + u8 *name = NULL; + struct indx_node *node = NULL; + u8 index_bits = ni->dir.index_bits; + + /* name is a buffer of PATH_MAX length */ + static_assert(NTFS_NAME_LEN * 4 < PATH_MAX); + + eod = i_size + sbi->record_size; + + if (pos >= eod) + return 0; + + if (!dir_emit_dots(file, ctx)) + return 0; + + /* allocate PATH_MAX bytes */ + name = __getname(); + if (!name) + return -ENOMEM; + + if (!ni->mi_loaded && ni->attr_list.size) { + /* + * directory inode is locked for read + * load all subrecords to avoid 'write' access to 'ni' during + * directory reading + */ + ni_lock(ni); + if (!ni->mi_loaded && ni->attr_list.size) { + err = ni_load_all_mi(ni); + if (!err) + ni->mi_loaded = true; + } + ni_unlock(ni); + if (err) + goto out; + } + + root = indx_get_root(&ni->dir, ni, NULL, NULL); + if (!root) { + err = -EINVAL; + goto out; + } + + if (pos >= sbi->record_size) { + bit = (pos - sbi->record_size) >> index_bits; + } else { + err = ntfs_read_hdr(sbi, ni, &root->ihdr, 0, pos, name, ctx); + if (err) + goto out; + bit = 0; + } + + if (!i_size) { + ctx->pos = eod; + goto out; + } + + for (;;) { + vbo = (u64)bit << index_bits; + if (vbo >= i_size) { + ctx->pos = eod; + goto out; + } + + err = indx_used_bit(&ni->dir, ni, &bit); + if (err) + goto out; + + if (bit == MINUS_ONE_T) { + ctx->pos = eod; + goto out; + } + + vbo = (u64)bit << index_bits; + if (vbo >= i_size) { + ntfs_inode_err(dir, "Looks like your dir is corrupt"); + err = -EINVAL; + goto out; + } + + err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits, + &node); + if (err) + goto out; + + err = ntfs_read_hdr(sbi, ni, &node->index->ihdr, + vbo + sbi->record_size, pos, name, ctx); + if (err) + goto out; + + bit += 1; + } + +out: + + __putname(name); + put_indx_node(node); + + if (err == -ENOENT) { + err = 0; + ctx->pos = pos; + } + + return err; +} + +static int ntfs_dir_count(struct inode *dir, bool *is_empty, size_t *dirs, + size_t *files) +{ + int err = 0; + struct ntfs_inode *ni = ntfs_i(dir); + struct NTFS_DE *e = NULL; + struct INDEX_ROOT *root; + struct INDEX_HDR *hdr; + const struct ATTR_FILE_NAME *fname; + u32 e_size, off, end; + u64 vbo = 0; + size_t drs = 0, fles = 0, bit = 0; + loff_t i_size = ni->vfs_inode.i_size; + struct indx_node *node = NULL; + u8 index_bits = ni->dir.index_bits; + + if (is_empty) + *is_empty = true; + + root = indx_get_root(&ni->dir, ni, NULL, NULL); + if (!root) + return -EINVAL; + + hdr = &root->ihdr; + + for (;;) { + end = le32_to_cpu(hdr->used); + off = le32_to_cpu(hdr->de_off); + + for (; off + sizeof(struct NTFS_DE) <= end; off += e_size) { + e = Add2Ptr(hdr, off); + e_size = le16_to_cpu(e->size); + if (e_size < sizeof(struct NTFS_DE) || + off + e_size > end) + break; + + if (de_is_last(e)) + break; + + fname = de_get_fname(e); + if (!fname) + continue; + + if (fname->type == FILE_NAME_DOS) + continue; + + if (is_empty) { + *is_empty = false; + if (!dirs && !files) + goto out; + } + + if (fname->dup.fa & FILE_ATTRIBUTE_DIRECTORY) + drs += 1; + else + fles += 1; + } + + if (vbo >= i_size) + goto out; + + err = indx_used_bit(&ni->dir, ni, &bit); + if (err) + goto out; + + if (bit == MINUS_ONE_T) + goto out; + + vbo = (u64)bit << index_bits; + if (vbo >= i_size) + goto out; + + err = indx_read(&ni->dir, ni, bit << ni->dir.idx2vbn_bits, + &node); + if (err) + goto out; + + hdr = &node->index->ihdr; + bit += 1; + vbo = (u64)bit << ni->dir.idx2vbn_bits; + } + +out: + put_indx_node(node); + if (dirs) + *dirs = drs; + if (files) + *files = fles; + + return err; +} + +bool dir_is_empty(struct inode *dir) +{ + bool is_empty = false; + + ntfs_dir_count(dir, &is_empty, NULL, NULL); + + return is_empty; +} + +const struct file_operations ntfs_dir_operations = { + .llseek = generic_file_llseek, + .read = generic_read_dir, + .iterate_shared = ntfs_readdir, + .fsync = generic_file_fsync, + .open = ntfs_file_open, +}; diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c new file mode 100644 index 000000000000..0dbdf237cc83 --- /dev/null +++ b/fs/ntfs3/file.c @@ -0,0 +1,1129 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + * regular file handling primitives for ntfs-based filesystems + */ +#include +#include +#include +#include +#include +#include /* FAT_IOCTL_XXX */ +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" + +static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg) +{ + struct fstrim_range __user *user_range; + struct fstrim_range range; + struct request_queue *q = bdev_get_queue(sbi->sb->s_bdev); + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (!blk_queue_discard(q)) + return -EOPNOTSUPP; + + user_range = (struct fstrim_range __user *)arg; + if (copy_from_user(&range, user_range, sizeof(range))) + return -EFAULT; + + range.minlen = max_t(u32, range.minlen, q->limits.discard_granularity); + + err = ntfs_trim_fs(sbi, &range); + if (err < 0) + return err; + + if (copy_to_user(user_range, &range, sizeof(range))) + return -EFAULT; + + return 0; +} + +static long ntfs_ioctl(struct file *filp, u32 cmd, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; + u32 __user *user_attr = (u32 __user *)arg; + + switch (cmd) { + case FAT_IOCTL_GET_ATTRIBUTES: + return put_user(le32_to_cpu(ntfs_i(inode)->std_fa), user_attr); + + case FAT_IOCTL_GET_VOLUME_ID: + return put_user(sbi->volume.ser_num, user_attr); + + case FITRIM: + return ntfs_ioctl_fitrim(sbi, arg); + } + return -ENOTTY; /* Inappropriate ioctl for device */ +} + +#ifdef CONFIG_COMPAT +static long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg) + +{ + return ntfs_ioctl(filp, cmd, (unsigned long)compat_ptr(arg)); +} +#endif + +/* + * inode_operations::getattr + */ +int ntfs_getattr(const struct path *path, struct kstat *stat, u32 request_mask, + u32 flags) +{ + struct inode *inode = d_inode(path->dentry); + struct ntfs_inode *ni = ntfs_i(inode); + + if (is_compressed(ni)) + stat->attributes |= STATX_ATTR_COMPRESSED; + + if (is_encrypted(ni)) + stat->attributes |= STATX_ATTR_ENCRYPTED; + + stat->attributes_mask |= STATX_ATTR_COMPRESSED | STATX_ATTR_ENCRYPTED; + + generic_fillattr(inode, stat); + + stat->result_mask |= STATX_BTIME; + stat->btime = ni->i_crtime; + + return 0; +} + +static int ntfs_extend_initialized_size(struct file *file, + struct ntfs_inode *ni, + const loff_t valid, + const loff_t new_valid) +{ + struct inode *inode = &ni->vfs_inode; + struct address_space *mapping = inode->i_mapping; + struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; + loff_t pos = valid; + int err; + + if (is_resident(ni)) { + ni->i_valid = new_valid; + return 0; + } + + WARN_ON(is_compressed(ni)); + WARN_ON(valid >= new_valid); + + for (;;) { + u32 zerofrom, len; + struct page *page; + void *fsdata; + u8 bits; + CLST vcn, lcn, clen; + + if (is_sparsed(ni)) { + bits = sbi->cluster_bits; + vcn = pos >> bits; + + err = attr_data_get_block(ni, vcn, 0, &lcn, &clen, + NULL); + if (err) + goto out; + + if (lcn == SPARSE_LCN) { + loff_t vbo = (loff_t)vcn << bits; + loff_t to = vbo + ((loff_t)clen << bits); + + if (to <= new_valid) { + ni->i_valid = to; + pos = to; + goto next; + } + + if (vbo < pos) { + pos = vbo; + } else { + to = (new_valid >> bits) << bits; + if (pos < to) { + ni->i_valid = to; + pos = to; + goto next; + } + } + } + } + + zerofrom = pos & (PAGE_SIZE - 1); + len = PAGE_SIZE - zerofrom; + + if (pos + len > new_valid) + len = new_valid - pos; + + err = pagecache_write_begin(file, mapping, pos, len, 0, &page, + &fsdata); + if (err) + goto out; + + zero_user_segment(page, zerofrom, PAGE_SIZE); + + /* this function in any case puts page*/ + err = pagecache_write_end(file, mapping, pos, len, len, page, + fsdata); + if (err < 0) + goto out; + pos += len; + +next: + if (pos >= new_valid) + break; + + balance_dirty_pages_ratelimited(mapping); + cond_resched(); + } + + mark_inode_dirty(inode); + + return 0; + +out: + ni->i_valid = valid; + ntfs_inode_warn(inode, "failed to extend initialized size to %llx.", + new_valid); + return err; +} + +/* + * ntfs_sparse_cluster + * + * Helper function to zero a new allocated clusters + */ +void ntfs_sparse_cluster(struct inode *inode, struct page *page0, CLST vcn, + CLST len) +{ + struct address_space *mapping = inode->i_mapping; + struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; + u64 vbo = (u64)vcn << sbi->cluster_bits; + u64 bytes = (u64)len << sbi->cluster_bits; + u32 blocksize = 1 << inode->i_blkbits; + pgoff_t idx0 = page0 ? page0->index : -1; + loff_t vbo_clst = vbo & sbi->cluster_mask_inv; + loff_t end = ntfs_up_cluster(sbi, vbo + bytes); + pgoff_t idx = vbo_clst >> PAGE_SHIFT; + u32 from = vbo_clst & (PAGE_SIZE - 1); + pgoff_t idx_end = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; + loff_t page_off; + u32 to; + bool partial; + struct page *page; + + for (; idx < idx_end; idx += 1, from = 0) { + page = idx == idx0 ? page0 : grab_cache_page(mapping, idx); + + if (!page) + continue; + + page_off = (loff_t)idx << PAGE_SHIFT; + to = (page_off + PAGE_SIZE) > end ? (end - page_off) + : PAGE_SIZE; + partial = false; + + if ((from || PAGE_SIZE != to) && + likely(!page_has_buffers(page))) { + create_empty_buffers(page, blocksize, 0); + if (!page_has_buffers(page)) { + ntfs_inode_err( + inode, + "failed to allocate page buffers."); + /*err = -ENOMEM;*/ + goto unlock_page; + } + } + + if (page_has_buffers(page)) { + struct buffer_head *head, *bh; + u32 bh_off = 0; + + bh = head = page_buffers(page); + do { + u32 bh_next = bh_off + blocksize; + + if (from <= bh_off && bh_next <= to) { + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + } else if (!buffer_uptodate(bh)) { + partial = true; + } + bh_off = bh_next; + } while (head != (bh = bh->b_this_page)); + } + + zero_user_segment(page, from, to); + + if (!partial) { + if (!PageUptodate(page)) + SetPageUptodate(page); + set_page_dirty(page); + } + +unlock_page: + if (idx != idx0) { + unlock_page(page); + put_page(page); + } + cond_resched(); + } + mark_inode_dirty(inode); +} + +/* + * file_operations::mmap + */ +static int ntfs_file_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + struct ntfs_inode *ni = ntfs_i(inode); + u64 to, from = ((u64)vma->vm_pgoff << PAGE_SHIFT); + bool rw = vma->vm_flags & VM_WRITE; + int err; + + if (is_encrypted(ni)) { + ntfs_inode_warn(inode, + "mmap is not supported for encrypted files"); + err = -EOPNOTSUPP; + goto out; + } + + if (!rw) + goto do_map; + + if (is_compressed(ni)) { + ntfs_inode_warn( + inode, + "mmap(write) is not supported for compressed files"); + err = -EOPNOTSUPP; + goto out; + } + + to = min_t(loff_t, i_size_read(inode), + from + vma->vm_end - vma->vm_start); + + if (is_sparsed(ni)) { + /* allocate clusters for rw map */ + struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; + CLST vcn, lcn, len; + CLST end = bytes_to_cluster(sbi, to); + bool new; + + for (vcn = from >> sbi->cluster_bits; vcn < end; vcn += len) { + err = attr_data_get_block(ni, vcn, 1, &lcn, &len, &new); + if (err) + goto out; + if (!new) + continue; + ntfs_sparse_cluster(inode, NULL, vcn, 1); + } + } + + if (ni->i_valid < to) { + inode_lock(inode); + err = ntfs_extend_initialized_size(file, ni, ni->i_valid, to); + inode_unlock(inode); + if (err) + goto out; + } + +do_map: + err = generic_file_mmap(file, vma); +out: + return err; +} + +static int ntfs_extend(struct inode *inode, loff_t pos, size_t count, + struct file *file) +{ + struct ntfs_inode *ni = ntfs_i(inode); + struct address_space *mapping = inode->i_mapping; + loff_t end = pos + count; + bool extend_init = file && pos > ni->i_valid; + int err; + + if (end <= inode->i_size && !extend_init) + return 0; + + /*mark rw ntfs as dirty. it will be cleared at umount*/ + ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_DIRTY); + + if (end > inode->i_size) { + err = ntfs_set_size(inode, end); + if (err) + goto out; + inode->i_size = end; + } + + if (extend_init && !is_compressed(ni)) { + err = ntfs_extend_initialized_size(file, ni, ni->i_valid, pos); + if (err) + goto out; + } else { + err = 0; + } + + inode->i_ctime = inode->i_mtime = current_time(inode); + mark_inode_dirty(inode); + + if (IS_SYNC(inode)) { + int err2; + + err = filemap_fdatawrite_range(mapping, pos, end - 1); + err2 = sync_mapping_buffers(mapping); + if (!err) + err = err2; + err2 = write_inode_now(inode, 1); + if (!err) + err = err2; + if (!err) + err = filemap_fdatawait_range(mapping, pos, end - 1); + } + +out: + return err; +} + +static int ntfs_truncate(struct inode *inode, loff_t new_size) +{ + struct super_block *sb = inode->i_sb; + struct ntfs_inode *ni = ntfs_i(inode); + int err, dirty = 0; + u64 new_valid; + + if (!S_ISREG(inode->i_mode)) + return 0; + + if (is_compressed(ni)) { + if (ni->i_valid > new_size) + ni->i_valid = new_size; + } else { + err = block_truncate_page(inode->i_mapping, new_size, + ntfs_get_block); + if (err) + return err; + } + + new_valid = ntfs_up_block(sb, min_t(u64, ni->i_valid, new_size)); + + ni_lock(ni); + + truncate_setsize(inode, new_size); + + down_write(&ni->file.run_lock); + err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, + &new_valid, true, NULL); + up_write(&ni->file.run_lock); + + if (new_valid < ni->i_valid) + ni->i_valid = new_valid; + + ni_unlock(ni); + + ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE; + inode->i_ctime = inode->i_mtime = current_time(inode); + if (!IS_DIRSYNC(inode)) { + dirty = 1; + } else { + err = ntfs_sync_inode(inode); + if (err) + return err; + } + + if (dirty) + mark_inode_dirty(inode); + + /*ntfs_flush_inodes(inode->i_sb, inode, NULL);*/ + + return 0; +} + +/* + * Preallocate space for a file. This implements ntfs's fallocate file + * operation, which gets called from sys_fallocate system call. User + * space requests 'len' bytes at 'vbo'. If FALLOC_FL_KEEP_SIZE is set + * we just allocate clusters without zeroing them out. Otherwise we + * allocate and zero out clusters via an expanding truncate. + */ +static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len) +{ + struct inode *inode = file->f_mapping->host; + struct super_block *sb = inode->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct ntfs_inode *ni = ntfs_i(inode); + loff_t end = vbo + len; + loff_t vbo_down = round_down(vbo, PAGE_SIZE); + loff_t i_size; + int err; + + /* No support for dir */ + if (!S_ISREG(inode->i_mode)) + return -EOPNOTSUPP; + + /* Return error if mode is not supported */ + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_COLLAPSE_RANGE)) + return -EOPNOTSUPP; + + ntfs_set_state(sbi, NTFS_DIRTY_DIRTY); + + inode_lock(inode); + i_size = inode->i_size; + + if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) { + /* should never be here, see ntfs_file_open*/ + err = -EOPNOTSUPP; + goto out; + } + + if (mode & FALLOC_FL_PUNCH_HOLE) { + if (!(mode & FALLOC_FL_KEEP_SIZE)) { + err = -EINVAL; + goto out; + } + + if (!is_sparsed(ni) && !is_compressed(ni)) { + ntfs_inode_warn( + inode, + "punch_hole only for sparsed/compressed files"); + err = -EOPNOTSUPP; + goto out; + } + + err = filemap_write_and_wait_range(inode->i_mapping, vbo, + end - 1); + if (err) + goto out; + + err = filemap_write_and_wait_range(inode->i_mapping, end, + LLONG_MAX); + if (err) + goto out; + + truncate_pagecache(inode, vbo_down); + + ni_lock(ni); + err = attr_punch_hole(ni, vbo, len); + ni_unlock(ni); + } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { + if (mode & ~FALLOC_FL_COLLAPSE_RANGE) { + err = -EINVAL; + goto out; + } + + /* + * Write tail of the last page before removed range since + * it will get removed from the page cache below. + */ + err = filemap_write_and_wait_range(inode->i_mapping, vbo_down, + vbo); + if (err) + goto out; + + /* + * Write data that will be shifted to preserve them + * when discarding page cache below + */ + err = filemap_write_and_wait_range(inode->i_mapping, end, + LLONG_MAX); + if (err) + goto out; + + truncate_pagecache(inode, vbo_down); + + ni_lock(ni); + err = attr_collapse_range(ni, vbo, len); + ni_unlock(ni); + } else { + /* + * normal file: allocate clusters, do not change 'valid' size + */ + err = ntfs_set_size(inode, max(end, i_size)); + if (err) + goto out; + + if (is_sparsed(ni) || is_compressed(ni)) { + CLST vcn_v = ni->i_valid >> sbi->cluster_bits; + CLST vcn = vbo >> sbi->cluster_bits; + CLST cend = bytes_to_cluster(sbi, end); + CLST lcn, clen; + bool new; + + /* + * allocate but not zero new clusters (see below comments) + * this breaks security (one can read unused on-disk areas) + * zeroing these clusters may be too long + * may be we should check here for root rights? + */ + for (; vcn < cend; vcn += clen) { + err = attr_data_get_block(ni, vcn, cend - vcn, + &lcn, &clen, &new); + if (err) + goto out; + if (!new || vcn >= vcn_v) + continue; + + /* + * Unwritten area + * NTFS is not able to store several unwritten areas + * Activate 'ntfs_sparse_cluster' to zero new allocated clusters + * + * Dangerous in case: + * 1G of sparsed clusters + 1 cluster of data => + * valid_size == 1G + 1 cluster + * fallocate(1G) will zero 1G and this can be very long + * xfstest 016/086 will fail without 'ntfs_sparse_cluster' + */ + /*ntfs_sparse_cluster(inode, NULL, vcn, + * min(vcn_v - vcn, clen)); + */ + } + } + + if (mode & FALLOC_FL_KEEP_SIZE) { + ni_lock(ni); + /*true - keep preallocated*/ + err = attr_set_size(ni, ATTR_DATA, NULL, 0, + &ni->file.run, i_size, &ni->i_valid, + true, NULL); + ni_unlock(ni); + } + } + + if (!err) { + inode->i_ctime = inode->i_mtime = current_time(inode); + mark_inode_dirty(inode); + } +out: + if (err == -EFBIG) + err = -ENOSPC; + + inode_unlock(inode); + return err; +} + +/* + * inode_operations::setattr + */ +int ntfs3_setattr(struct dentry *dentry, struct iattr *attr) +{ + struct super_block *sb = dentry->d_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct inode *inode = d_inode(dentry); + struct ntfs_inode *ni = ntfs_i(inode); + u32 ia_valid = attr->ia_valid; + umode_t mode = inode->i_mode; + int err; + + if (sbi->options.no_acs_rules) { + /* "no access rules" - force any changes of time etc. */ + attr->ia_valid |= ATTR_FORCE; + /* and disable for editing some attributes */ + attr->ia_valid &= ~(ATTR_UID | ATTR_GID | ATTR_MODE); + ia_valid = attr->ia_valid; + } + + err = setattr_prepare(dentry, attr); + if (err) + goto out; + + if (ia_valid & ATTR_SIZE) { + loff_t oldsize = inode->i_size; + + if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) { + /* should never be here, see ntfs_file_open*/ + err = -EOPNOTSUPP; + goto out; + } + inode_dio_wait(inode); + + if (attr->ia_size < oldsize) + err = ntfs_truncate(inode, attr->ia_size); + else if (attr->ia_size > oldsize) + err = ntfs_extend(inode, attr->ia_size, 0, NULL); + + if (err) + goto out; + + ni->ni_flags |= NI_FLAG_UPDATE_PARENT; + } + + setattr_copy(inode, attr); + + if (mode != inode->i_mode) { + err = ntfs_acl_chmod(inode); + if (err) + goto out; + + /* linux 'w' -> windows 'ro' */ + if (0222 & inode->i_mode) + ni->std_fa &= ~FILE_ATTRIBUTE_READONLY; + else + ni->std_fa |= FILE_ATTRIBUTE_READONLY; + } + + mark_inode_dirty(inode); +out: + return err; +} + +static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) +{ + ssize_t err; + size_t count = iov_iter_count(iter); + struct file *file = iocb->ki_filp; + struct inode *inode = file->f_mapping->host; + struct ntfs_inode *ni = ntfs_i(inode); + + if (is_encrypted(ni)) { + ntfs_inode_warn(inode, "encrypted i/o not supported"); + return -EOPNOTSUPP; + } + + if (is_compressed(ni) && (iocb->ki_flags & IOCB_DIRECT)) { + ntfs_inode_warn(inode, "direct i/o + compressed not supported"); + return -EOPNOTSUPP; + } + +#ifndef CONFIG_NTFS3_LZX_XPRESS + if (ni->ni_flags & NI_FLAG_COMPRESSED_MASK) { + ntfs_inode_warn( + inode, + "activate CONFIG_NTFS3_LZX_XPRESS to read external compressed files"); + return -EOPNOTSUPP; + } +#endif + + if (is_dedup(ni)) { + ntfs_inode_warn(inode, "read deduplicated not supported"); + return -EOPNOTSUPP; + } + + err = count ? generic_file_read_iter(iocb, iter) : 0; + + return err; +} + +/* returns array of locked pages */ +static int ntfs_get_frame_pages(struct address_space *mapping, pgoff_t index, + struct page **pages, u32 pages_per_frame, + bool *frame_uptodate) +{ + gfp_t gfp_mask = mapping_gfp_mask(mapping); + u32 npages; + + *frame_uptodate = true; + + for (npages = 0; npages < pages_per_frame; npages++, index++) { + struct page *page; + + page = find_or_create_page(mapping, index, gfp_mask); + if (!page) { + while (npages--) { + page = pages[npages]; + unlock_page(page); + put_page(page); + } + + return -ENOMEM; + } + + if (!PageUptodate(page)) + *frame_uptodate = false; + + pages[npages] = page; + } + + return 0; +} + +/*helper for ntfs_file_write_iter (compressed files)*/ +static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from) +{ + int err; + struct file *file = iocb->ki_filp; + size_t count = iov_iter_count(from); + loff_t pos = iocb->ki_pos; + struct inode *inode = file_inode(file); + loff_t i_size = inode->i_size; + struct address_space *mapping = inode->i_mapping; + struct ntfs_inode *ni = ntfs_i(inode); + u64 valid = ni->i_valid; + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct page *page, **pages = NULL; + size_t written = 0; + u8 frame_bits = NTFS_LZNT_CUNIT + sbi->cluster_bits; + u32 frame_size = 1u << frame_bits; + u32 pages_per_frame = frame_size >> PAGE_SHIFT; + u32 ip, off; + CLST frame; + u64 frame_vbo; + pgoff_t index; + bool frame_uptodate; + + if (frame_size < PAGE_SIZE) { + /* + * frame_size == 8K if cluster 512 + * frame_size == 64K if cluster 4096 + */ + ntfs_inode_warn(inode, "page size is bigger than frame size"); + return -EOPNOTSUPP; + } + + pages = ntfs_malloc(pages_per_frame * sizeof(struct page *)); + if (!pages) + return -ENOMEM; + + current->backing_dev_info = inode_to_bdi(inode); + err = file_remove_privs(file); + if (err) + goto out; + + err = file_update_time(file); + if (err) + goto out; + + /* zero range [valid : pos) */ + while (valid < pos) { + CLST lcn, clen; + + frame = valid >> frame_bits; + frame_vbo = valid & ~(frame_size - 1); + off = valid & (frame_size - 1); + + err = attr_data_get_block(ni, frame << NTFS_LZNT_CUNIT, 0, &lcn, + &clen, NULL); + if (err) + goto out; + + if (lcn == SPARSE_LCN) { + ni->i_valid = valid = + frame_vbo + ((u64)clen << sbi->cluster_bits); + continue; + } + + /* Load full frame */ + err = ntfs_get_frame_pages(mapping, frame_vbo >> PAGE_SHIFT, + pages, pages_per_frame, + &frame_uptodate); + if (err) + goto out; + + if (!frame_uptodate && off) { + err = ni_read_frame(ni, frame_vbo, pages, + pages_per_frame); + if (err) { + for (ip = 0; ip < pages_per_frame; ip++) { + page = pages[ip]; + unlock_page(page); + put_page(page); + } + goto out; + } + } + + ip = off >> PAGE_SHIFT; + off = offset_in_page(valid); + for (; ip < pages_per_frame; ip++, off = 0) { + page = pages[ip]; + zero_user_segment(page, off, PAGE_SIZE); + flush_dcache_page(page); + SetPageUptodate(page); + } + + ni_lock(ni); + err = ni_write_frame(ni, pages, pages_per_frame); + ni_unlock(ni); + + for (ip = 0; ip < pages_per_frame; ip++) { + page = pages[ip]; + SetPageUptodate(page); + unlock_page(page); + put_page(page); + } + + if (err) + goto out; + + ni->i_valid = valid = frame_vbo + frame_size; + } + + /* copy user data [pos : pos + count) */ + while (count) { + size_t copied, bytes; + + off = pos & (frame_size - 1); + bytes = frame_size - off; + if (bytes > count) + bytes = count; + + frame = pos >> frame_bits; + frame_vbo = pos & ~(frame_size - 1); + index = frame_vbo >> PAGE_SHIFT; + + if (unlikely(iov_iter_fault_in_readable(from, bytes))) { + err = -EFAULT; + goto out; + } + + /* Load full frame */ + err = ntfs_get_frame_pages(mapping, index, pages, + pages_per_frame, &frame_uptodate); + if (err) + goto out; + + if (!frame_uptodate) { + loff_t to = pos + bytes; + + if (off || (to < i_size && (to & (frame_size - 1)))) { + err = ni_read_frame(ni, frame_vbo, pages, + pages_per_frame); + if (err) { + for (ip = 0; ip < pages_per_frame; + ip++) { + page = pages[ip]; + unlock_page(page); + put_page(page); + } + goto out; + } + } + } + + WARN_ON(!bytes); + copied = 0; + ip = off >> PAGE_SHIFT; + off = offset_in_page(pos); + + /* copy user data to pages */ + for (;;) { + size_t cp, tail = PAGE_SIZE - off; + + page = pages[ip]; + cp = iov_iter_copy_from_user_atomic(page, from, off, + min(tail, bytes)); + flush_dcache_page(page); + iov_iter_advance(from, cp); + copied += cp; + bytes -= cp; + if (!bytes || !cp) + break; + + if (cp < tail) { + off += cp; + } else { + ip++; + off = 0; + } + } + + ni_lock(ni); + err = ni_write_frame(ni, pages, pages_per_frame); + ni_unlock(ni); + + for (ip = 0; ip < pages_per_frame; ip++) { + page = pages[ip]; + ClearPageDirty(page); + SetPageUptodate(page); + unlock_page(page); + put_page(page); + } + + if (err) + goto out; + + /* + * We can loop for a long time in here. Be nice and allow + * us to schedule out to avoid softlocking if preempt + * is disabled. + */ + cond_resched(); + + pos += copied; + written += copied; + + count = iov_iter_count(from); + } + +out: + ntfs_free(pages); + + current->backing_dev_info = NULL; + + if (err < 0) + return err; + + iocb->ki_pos += written; + if (iocb->ki_pos > ni->i_valid) + ni->i_valid = iocb->ki_pos; + + return written; +} + +/* + * file_operations::write_iter + */ +static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + ssize_t ret; + struct ntfs_inode *ni = ntfs_i(inode); + + if (is_encrypted(ni)) { + ntfs_inode_warn(inode, "encrypted i/o not supported"); + return -EOPNOTSUPP; + } + + if (is_compressed(ni) && (iocb->ki_flags & IOCB_DIRECT)) { + ntfs_inode_warn(inode, "direct i/o + compressed not supported"); + return -EOPNOTSUPP; + } + + if (is_dedup(ni)) { + ntfs_inode_warn(inode, "write into deduplicated not supported"); + return -EOPNOTSUPP; + } + + if (!inode_trylock(inode)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + inode_lock(inode); + } + + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto out; + + if (WARN_ON(ni->ni_flags & NI_FLAG_COMPRESSED_MASK)) { + /* should never be here, see ntfs_file_open*/ + ret = -EOPNOTSUPP; + goto out; + } + + ret = ntfs_extend(inode, iocb->ki_pos, ret, file); + if (ret) + goto out; + + ret = is_compressed(ni) ? ntfs_compress_write(iocb, from) + : __generic_file_write_iter(iocb, from); + +out: + inode_unlock(inode); + + if (ret > 0) + ret = generic_write_sync(iocb, ret); + + return ret; +} + +/* + * file_operations::open + */ +int ntfs_file_open(struct inode *inode, struct file *file) +{ + struct ntfs_inode *ni = ntfs_i(inode); + + if (unlikely((is_compressed(ni) || is_encrypted(ni)) && + (file->f_flags & O_DIRECT))) { + return -EOPNOTSUPP; + } + + /* Decompress "external compressed" file if opened for rw */ + if ((ni->ni_flags & NI_FLAG_COMPRESSED_MASK) && + (file->f_flags & (O_WRONLY | O_RDWR | O_TRUNC))) { +#ifdef CONFIG_NTFS3_LZX_XPRESS + int err = ni_decompress_file(ni); + + if (err) + return err; +#else + ntfs_inode_warn( + inode, + "activate CONFIG_NTFS3_LZX_XPRESS to write external compressed files"); + return -EOPNOTSUPP; +#endif + } + + return generic_file_open(inode, file); +} + +/* + * file_operations::release + */ +static int ntfs_file_release(struct inode *inode, struct file *file) +{ + struct ntfs_inode *ni = ntfs_i(inode); + struct ntfs_sb_info *sbi = ni->mi.sbi; + int err = 0; + + /* if we are the last writer on the inode, drop the block reservation */ + if (sbi->options.prealloc && ((file->f_mode & FMODE_WRITE) && + atomic_read(&inode->i_writecount) == 1)) { + ni_lock(ni); + down_write(&ni->file.run_lock); + + err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, + inode->i_size, &ni->i_valid, false, NULL); + + up_write(&ni->file.run_lock); + ni_unlock(ni); + } + return err; +} + +/* file_operations::fiemap */ +int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len) +{ + int err; + struct ntfs_inode *ni = ntfs_i(inode); + + if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) + return -EOPNOTSUPP; + + ni_lock(ni); + + err = ni_fiemap(ni, fieinfo, start, len); + + ni_unlock(ni); + + return err; +} + +const struct inode_operations ntfs_file_inode_operations = { + .getattr = ntfs_getattr, + .setattr = ntfs3_setattr, + .listxattr = ntfs_listxattr, + .permission = ntfs_permission, + .get_acl = ntfs_get_acl, + .set_acl = ntfs_set_acl, + .fiemap = ntfs_fiemap, +}; + +const struct file_operations ntfs_file_operations = { + .llseek = generic_file_llseek, + .read_iter = ntfs_file_read_iter, + .write_iter = ntfs_file_write_iter, + .unlocked_ioctl = ntfs_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = ntfs_compat_ioctl, +#endif + .splice_read = generic_file_splice_read, + .mmap = ntfs_file_mmap, + .open = ntfs_file_open, + .fsync = generic_file_fsync, + .splice_write = iter_file_splice_write, + .fallocate = ntfs_fallocate, + .release = ntfs_file_release, +}; diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c new file mode 100644 index 000000000000..27ccb37064bb --- /dev/null +++ b/fs/ntfs3/frecord.c @@ -0,0 +1,3071 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + */ + +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" +#ifdef CONFIG_NTFS3_LZX_XPRESS +#include "lib/lib.h" +#endif + +static struct mft_inode *ni_ins_mi(struct ntfs_inode *ni, struct rb_root *tree, + CLST ino, struct rb_node *ins) +{ + struct rb_node **p = &tree->rb_node; + struct rb_node *pr = NULL; + + while (*p) { + struct mft_inode *mi; + + pr = *p; + mi = rb_entry(pr, struct mft_inode, node); + if (mi->rno > ino) + p = &pr->rb_left; + else if (mi->rno < ino) + p = &pr->rb_right; + else + return mi; + } + + if (!ins) + return NULL; + + rb_link_node(ins, pr, p); + rb_insert_color(ins, tree); + return rb_entry(ins, struct mft_inode, node); +} + +/* + * ni_find_mi + * + * finds mft_inode by record number + */ +static struct mft_inode *ni_find_mi(struct ntfs_inode *ni, CLST rno) +{ + return ni_ins_mi(ni, &ni->mi_tree, rno, NULL); +} + +/* + * ni_add_mi + * + * adds new mft_inode into ntfs_inode + */ +static void ni_add_mi(struct ntfs_inode *ni, struct mft_inode *mi) +{ + ni_ins_mi(ni, &ni->mi_tree, mi->rno, &mi->node); +} + +/* + * ni_remove_mi + * + * removes mft_inode from ntfs_inode + */ +void ni_remove_mi(struct ntfs_inode *ni, struct mft_inode *mi) +{ + rb_erase(&mi->node, &ni->mi_tree); +} + +/* + * ni_std + * + * returns pointer into std_info from primary record + */ +struct ATTR_STD_INFO *ni_std(struct ntfs_inode *ni) +{ + const struct ATTRIB *attr; + + attr = mi_find_attr(&ni->mi, NULL, ATTR_STD, NULL, 0, NULL); + return attr ? resident_data_ex(attr, sizeof(struct ATTR_STD_INFO)) + : NULL; +} + +/* + * ni_std5 + * + * returns pointer into std_info from primary record + */ +struct ATTR_STD_INFO5 *ni_std5(struct ntfs_inode *ni) +{ + const struct ATTRIB *attr; + + attr = mi_find_attr(&ni->mi, NULL, ATTR_STD, NULL, 0, NULL); + + return attr ? resident_data_ex(attr, sizeof(struct ATTR_STD_INFO5)) + : NULL; +} + +/* + * ni_clear + * + * clears resources allocated by ntfs_inode + */ +void ni_clear(struct ntfs_inode *ni) +{ + struct rb_node *node; + + if (!ni->vfs_inode.i_nlink && is_rec_inuse(ni->mi.mrec)) + ni_delete_all(ni); + + al_destroy(ni); + + for (node = rb_first(&ni->mi_tree); node;) { + struct rb_node *next = rb_next(node); + struct mft_inode *mi = rb_entry(node, struct mft_inode, node); + + rb_erase(node, &ni->mi_tree); + mi_put(mi); + node = next; + } + + /* bad inode always has mode == S_IFREG */ + if (ni->ni_flags & NI_FLAG_DIR) + indx_clear(&ni->dir); + else { + run_close(&ni->file.run); +#ifdef CONFIG_NTFS3_LZX_XPRESS + if (ni->file.offs_page) { + /* on-demand allocated page for offsets */ + put_page(ni->file.offs_page); + ni->file.offs_page = NULL; + } +#endif + } + + mi_clear(&ni->mi); +} + +/* + * ni_load_mi_ex + * + * finds mft_inode by record number. + */ +int ni_load_mi_ex(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi) +{ + int err; + struct mft_inode *r; + + r = ni_find_mi(ni, rno); + if (r) + goto out; + + err = mi_get(ni->mi.sbi, rno, &r); + if (err) + return err; + + ni_add_mi(ni, r); + +out: + if (mi) + *mi = r; + return 0; +} + +/* + * ni_load_mi + * + * load mft_inode corresponded list_entry + */ +int ni_load_mi(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le, + struct mft_inode **mi) +{ + CLST rno; + + if (!le) { + *mi = &ni->mi; + return 0; + } + + rno = ino_get(&le->ref); + if (rno == ni->mi.rno) { + *mi = &ni->mi; + return 0; + } + return ni_load_mi_ex(ni, rno, mi); +} + +/* + * ni_find_attr + * + * returns attribute and record this attribute belongs to + */ +struct ATTRIB *ni_find_attr(struct ntfs_inode *ni, struct ATTRIB *attr, + struct ATTR_LIST_ENTRY **le_o, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, const CLST *vcn, + struct mft_inode **mi) +{ + struct ATTR_LIST_ENTRY *le; + struct mft_inode *m; + + if (!ni->attr_list.size || + (!name_len && (type == ATTR_LIST || type == ATTR_STD))) { + if (le_o) + *le_o = NULL; + if (mi) + *mi = &ni->mi; + + /* Look for required attribute in primary record */ + return mi_find_attr(&ni->mi, attr, type, name, name_len, NULL); + } + + /* first look for list entry of required type */ + le = al_find_ex(ni, le_o ? *le_o : NULL, type, name, name_len, vcn); + if (!le) + return NULL; + + if (le_o) + *le_o = le; + + /* Load record that contains this attribute */ + if (ni_load_mi(ni, le, &m)) + return NULL; + + /* Look for required attribute */ + attr = mi_find_attr(m, NULL, type, name, name_len, &le->id); + + if (!attr) + goto out; + + if (!attr->non_res) { + if (vcn && *vcn) + goto out; + } else if (!vcn) { + if (attr->nres.svcn) + goto out; + } else if (le64_to_cpu(attr->nres.svcn) > *vcn || + *vcn > le64_to_cpu(attr->nres.evcn)) { + goto out; + } + + if (mi) + *mi = m; + return attr; + +out: + ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_ERROR); + return NULL; +} + +/* + * ni_enum_attr_ex + * + * enumerates attributes in ntfs_inode + */ +struct ATTRIB *ni_enum_attr_ex(struct ntfs_inode *ni, struct ATTRIB *attr, + struct ATTR_LIST_ENTRY **le, + struct mft_inode **mi) +{ + struct mft_inode *mi2; + struct ATTR_LIST_ENTRY *le2; + + /* Do we have an attribute list? */ + if (!ni->attr_list.size) { + *le = NULL; + if (mi) + *mi = &ni->mi; + /* Enum attributes in primary record */ + return mi_enum_attr(&ni->mi, attr); + } + + /* get next list entry */ + le2 = *le = al_enumerate(ni, attr ? *le : NULL); + if (!le2) + return NULL; + + /* Load record that contains the required attribute */ + if (ni_load_mi(ni, le2, &mi2)) + return NULL; + + if (mi) + *mi = mi2; + + /* Find attribute in loaded record */ + return rec_find_attr_le(mi2, le2); +} + +/* + * ni_load_attr + * + * loads attribute that contains given vcn + */ +struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, CLST vcn, + struct mft_inode **pmi) +{ + struct ATTR_LIST_ENTRY *le; + struct ATTRIB *attr; + struct mft_inode *mi; + struct ATTR_LIST_ENTRY *next; + + if (!ni->attr_list.size) { + if (pmi) + *pmi = &ni->mi; + return mi_find_attr(&ni->mi, NULL, type, name, name_len, NULL); + } + + le = al_find_ex(ni, NULL, type, name, name_len, NULL); + if (!le) + return NULL; + + /* + * Unfortunately ATTR_LIST_ENTRY contains only start vcn + * So to find the ATTRIB segment that contains 'vcn' we should + * enumerate some entries + */ + if (vcn) { + for (;; le = next) { + next = al_find_ex(ni, le, type, name, name_len, NULL); + if (!next || le64_to_cpu(next->vcn) > vcn) + break; + } + } + + if (ni_load_mi(ni, le, &mi)) + return NULL; + + if (pmi) + *pmi = mi; + + attr = mi_find_attr(mi, NULL, type, name, name_len, &le->id); + if (!attr) + return NULL; + + if (!attr->non_res) + return attr; + + if (le64_to_cpu(attr->nres.svcn) <= vcn && + vcn <= le64_to_cpu(attr->nres.evcn)) + return attr; + + return NULL; +} + +/* + * ni_load_all_mi + * + * loads all subrecords + */ +int ni_load_all_mi(struct ntfs_inode *ni) +{ + int err; + struct ATTR_LIST_ENTRY *le; + + if (!ni->attr_list.size) + return 0; + + le = NULL; + + while ((le = al_enumerate(ni, le))) { + CLST rno = ino_get(&le->ref); + + if (rno == ni->mi.rno) + continue; + + err = ni_load_mi_ex(ni, rno, NULL); + if (err) + return err; + } + + return 0; +} + +/* + * ni_add_subrecord + * + * allocate + format + attach a new subrecord + */ +bool ni_add_subrecord(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi) +{ + struct mft_inode *m; + + m = ntfs_zalloc(sizeof(struct mft_inode)); + if (!m) + return false; + + if (mi_format_new(m, ni->mi.sbi, rno, 0, ni->mi.rno == MFT_REC_MFT)) { + mi_put(m); + return false; + } + + mi_get_ref(&ni->mi, &m->mrec->parent_ref); + + ni_add_mi(ni, m); + *mi = m; + return true; +} + +/* + * ni_remove_attr + * + * removes all attributes for the given type/name/id + */ +int ni_remove_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, size_t name_len, bool base_only, + const __le16 *id) +{ + int err; + struct ATTRIB *attr; + struct ATTR_LIST_ENTRY *le; + struct mft_inode *mi; + u32 type_in; + int diff; + + if (base_only || type == ATTR_LIST || !ni->attr_list.size) { + attr = mi_find_attr(&ni->mi, NULL, type, name, name_len, id); + if (!attr) + return -ENOENT; + + mi_remove_attr(&ni->mi, attr); + return 0; + } + + type_in = le32_to_cpu(type); + le = NULL; + + for (;;) { + le = al_enumerate(ni, le); + if (!le) + return 0; + +next_le2: + diff = le32_to_cpu(le->type) - type_in; + if (diff < 0) + continue; + + if (diff > 0) + return 0; + + if (le->name_len != name_len) + continue; + + if (name_len && + memcmp(le_name(le), name, name_len * sizeof(short))) + continue; + + if (id && le->id != *id) + continue; + err = ni_load_mi(ni, le, &mi); + if (err) + return err; + + al_remove_le(ni, le); + + attr = mi_find_attr(mi, NULL, type, name, name_len, id); + if (!attr) + return -ENOENT; + + mi_remove_attr(mi, attr); + + if (PtrOffset(ni->attr_list.le, le) >= ni->attr_list.size) + return 0; + goto next_le2; + } +} + +/* + * ni_ins_new_attr + * + * inserts the attribute into record + * Returns not full constructed attribute or NULL if not possible to create + */ +static struct ATTRIB *ni_ins_new_attr(struct ntfs_inode *ni, + struct mft_inode *mi, + struct ATTR_LIST_ENTRY *le, + enum ATTR_TYPE type, const __le16 *name, + u8 name_len, u32 asize, u16 name_off, + CLST svcn) +{ + int err; + struct ATTRIB *attr; + bool le_added = false; + struct MFT_REF ref; + + mi_get_ref(mi, &ref); + + if (type != ATTR_LIST && !le && ni->attr_list.size) { + err = al_add_le(ni, type, name, name_len, svcn, cpu_to_le16(-1), + &ref, &le); + if (err) { + /* no memory or no space */ + return NULL; + } + le_added = true; + + /* + * al_add_le -> attr_set_size (list) -> ni_expand_list + * which moves some attributes out of primary record + * this means that name may point into moved memory + * reinit 'name' from le + */ + name = le->name; + } + + attr = mi_insert_attr(mi, type, name, name_len, asize, name_off); + if (!attr) { + if (le_added) + al_remove_le(ni, le); + return NULL; + } + + if (type == ATTR_LIST) { + /*attr list is not in list entry array*/ + goto out; + } + + if (!le) + goto out; + + /* Update ATTRIB Id and record reference */ + le->id = attr->id; + ni->attr_list.dirty = true; + le->ref = ref; + +out: + return attr; +} + +/* + * random write access to sparsed or compressed file may result to + * not optimized packed runs. + * Here it is the place to optimize it + */ +static int ni_repack(struct ntfs_inode *ni) +{ + int err = 0; + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct mft_inode *mi, *mi_p = NULL; + struct ATTRIB *attr = NULL, *attr_p; + struct ATTR_LIST_ENTRY *le = NULL, *le_p; + CLST alloc = 0; + u8 cluster_bits = sbi->cluster_bits; + CLST svcn, evcn = 0, svcn_p, evcn_p, next_svcn; + u32 roff, rs = sbi->record_size; + struct runs_tree run; + + run_init(&run); + + while ((attr = ni_enum_attr_ex(ni, attr, &le, &mi))) { + if (!attr->non_res) + continue; + + svcn = le64_to_cpu(attr->nres.svcn); + if (svcn != le64_to_cpu(le->vcn)) { + err = -EINVAL; + break; + } + + if (!svcn) { + alloc = le64_to_cpu(attr->nres.alloc_size) >> + cluster_bits; + mi_p = NULL; + } else if (svcn != evcn + 1) { + err = -EINVAL; + break; + } + + evcn = le64_to_cpu(attr->nres.evcn); + + if (svcn > evcn + 1) { + err = -EINVAL; + break; + } + + if (!mi_p) { + /* do not try if too little free space */ + if (le32_to_cpu(mi->mrec->used) + 8 >= rs) + continue; + + /* do not try if last attribute segment */ + if (evcn + 1 == alloc) + continue; + run_close(&run); + } + + roff = le16_to_cpu(attr->nres.run_off); + err = run_unpack(&run, sbi, ni->mi.rno, svcn, evcn, svcn, + Add2Ptr(attr, roff), + le32_to_cpu(attr->size) - roff); + if (err < 0) + break; + + if (!mi_p) { + mi_p = mi; + attr_p = attr; + svcn_p = svcn; + evcn_p = evcn; + le_p = le; + err = 0; + continue; + } + + /* + * run contains data from two records: mi_p and mi + * try to pack in one + */ + err = mi_pack_runs(mi_p, attr_p, &run, evcn + 1 - svcn_p); + if (err) + break; + + next_svcn = le64_to_cpu(attr_p->nres.evcn) + 1; + + if (next_svcn >= evcn + 1) { + /* we can remove this attribute segment */ + al_remove_le(ni, le); + mi_remove_attr(mi, attr); + le = le_p; + continue; + } + + attr->nres.svcn = le->vcn = cpu_to_le64(next_svcn); + mi->dirty = true; + ni->attr_list.dirty = true; + + if (evcn + 1 == alloc) { + err = mi_pack_runs(mi, attr, &run, + evcn + 1 - next_svcn); + if (err) + break; + mi_p = NULL; + } else { + mi_p = mi; + attr_p = attr; + svcn_p = next_svcn; + evcn_p = evcn; + le_p = le; + run_truncate_head(&run, next_svcn); + } + } + + if (err) { + ntfs_inode_warn(&ni->vfs_inode, "there is a problem"); + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + + /* Pack loaded but not packed runs */ + if (mi_p) + mi_pack_runs(mi_p, attr_p, &run, evcn_p + 1 - svcn_p); + } + + run_close(&run); + return err; +} + +/* + * ni_try_remove_attr_list + * + * Can we remove attribute list? + * Check the case when primary record contains enough space for all attributes + */ +static int ni_try_remove_attr_list(struct ntfs_inode *ni) +{ + int err = 0; + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct ATTRIB *attr, *attr_list, *attr_ins; + struct ATTR_LIST_ENTRY *le; + struct mft_inode *mi; + u32 asize, free; + struct MFT_REF ref; + __le16 id; + + if (!ni->attr_list.dirty) + return 0; + + err = ni_repack(ni); + if (err) + return err; + + attr_list = mi_find_attr(&ni->mi, NULL, ATTR_LIST, NULL, 0, NULL); + if (!attr_list) + return 0; + + asize = le32_to_cpu(attr_list->size); + + /* free space in primary record without attribute list */ + free = sbi->record_size - le32_to_cpu(ni->mi.mrec->used) + asize; + mi_get_ref(&ni->mi, &ref); + + le = NULL; + while ((le = al_enumerate(ni, le))) { + if (!memcmp(&le->ref, &ref, sizeof(ref))) + continue; + + if (le->vcn) + return 0; + + mi = ni_find_mi(ni, ino_get(&le->ref)); + if (!mi) + return 0; + + attr = mi_find_attr(mi, NULL, le->type, le_name(le), + le->name_len, &le->id); + if (!attr) + return 0; + + asize = le32_to_cpu(attr->size); + if (asize > free) + return 0; + + free -= asize; + } + + /* Is seems that attribute list can be removed from primary record */ + mi_remove_attr(&ni->mi, attr_list); + + /* + * Repeat the cycle above and move all attributes to primary record. + * It should be success! + */ + le = NULL; + while ((le = al_enumerate(ni, le))) { + if (!memcmp(&le->ref, &ref, sizeof(ref))) + continue; + + mi = ni_find_mi(ni, ino_get(&le->ref)); + + attr = mi_find_attr(mi, NULL, le->type, le_name(le), + le->name_len, &le->id); + asize = le32_to_cpu(attr->size); + + /* insert into primary record */ + attr_ins = mi_insert_attr(&ni->mi, le->type, le_name(le), + le->name_len, asize, + le16_to_cpu(attr->name_off)); + id = attr_ins->id; + + /* copy all except id */ + memcpy(attr_ins, attr, asize); + attr_ins->id = id; + + /* remove from original record */ + mi_remove_attr(mi, attr); + } + + run_deallocate(sbi, &ni->attr_list.run, true); + run_close(&ni->attr_list.run); + ni->attr_list.size = 0; + ntfs_free(ni->attr_list.le); + ni->attr_list.le = NULL; + ni->attr_list.dirty = false; + + return 0; +} + +/* + * ni_create_attr_list + * + * generates an attribute list for this primary record + */ +int ni_create_attr_list(struct ntfs_inode *ni) +{ + struct ntfs_sb_info *sbi = ni->mi.sbi; + int err; + u32 lsize; + struct ATTRIB *attr; + struct ATTRIB *arr_move[7]; + struct ATTR_LIST_ENTRY *le, *le_b[7]; + struct MFT_REC *rec; + bool is_mft; + CLST rno = 0; + struct mft_inode *mi; + u32 free_b, nb, to_free, rs; + u16 sz; + + is_mft = ni->mi.rno == MFT_REC_MFT; + rec = ni->mi.mrec; + rs = sbi->record_size; + + /* + * Skip estimating exact memory requirement + * Looks like one record_size is always enough + */ + le = ntfs_malloc(al_aligned(rs)); + if (!le) { + err = -ENOMEM; + goto out; + } + + mi_get_ref(&ni->mi, &le->ref); + ni->attr_list.le = le; + + attr = NULL; + nb = 0; + free_b = 0; + attr = NULL; + + for (; (attr = mi_enum_attr(&ni->mi, attr)); le = Add2Ptr(le, sz)) { + sz = le_size(attr->name_len); + le->type = attr->type; + le->size = cpu_to_le16(sz); + le->name_len = attr->name_len; + le->name_off = offsetof(struct ATTR_LIST_ENTRY, name); + le->vcn = 0; + if (le != ni->attr_list.le) + le->ref = ni->attr_list.le->ref; + le->id = attr->id; + + if (attr->name_len) + memcpy(le->name, attr_name(attr), + sizeof(short) * attr->name_len); + else if (attr->type == ATTR_STD) + continue; + else if (attr->type == ATTR_LIST) + continue; + else if (is_mft && attr->type == ATTR_DATA) + continue; + + if (!nb || nb < ARRAY_SIZE(arr_move)) { + le_b[nb] = le; + arr_move[nb++] = attr; + free_b += le32_to_cpu(attr->size); + } + } + + lsize = PtrOffset(ni->attr_list.le, le); + ni->attr_list.size = lsize; + + to_free = le32_to_cpu(rec->used) + lsize + SIZEOF_RESIDENT; + if (to_free <= rs) { + to_free = 0; + } else { + to_free -= rs; + + if (to_free > free_b) { + err = -EINVAL; + goto out1; + } + } + + /* Allocate child mft. */ + err = ntfs_look_free_mft(sbi, &rno, is_mft, ni, &mi); + if (err) + goto out1; + + /* Call 'mi_remove_attr' in reverse order to keep pointers 'arr_move' valid */ + while (to_free > 0) { + struct ATTRIB *b = arr_move[--nb]; + u32 asize = le32_to_cpu(b->size); + u16 name_off = le16_to_cpu(b->name_off); + + attr = mi_insert_attr(mi, b->type, Add2Ptr(b, name_off), + b->name_len, asize, name_off); + WARN_ON(!attr); + + mi_get_ref(mi, &le_b[nb]->ref); + le_b[nb]->id = attr->id; + + /* copy all except id */ + memcpy(attr, b, asize); + attr->id = le_b[nb]->id; + + WARN_ON(!mi_remove_attr(&ni->mi, b)); + + if (to_free <= asize) + break; + to_free -= asize; + WARN_ON(!nb); + } + + attr = mi_insert_attr(&ni->mi, ATTR_LIST, NULL, 0, + lsize + SIZEOF_RESIDENT, SIZEOF_RESIDENT); + WARN_ON(!attr); + + attr->non_res = 0; + attr->flags = 0; + attr->res.data_size = cpu_to_le32(lsize); + attr->res.data_off = SIZEOF_RESIDENT_LE; + attr->res.flags = 0; + attr->res.res = 0; + + memcpy(resident_data_ex(attr, lsize), ni->attr_list.le, lsize); + + ni->attr_list.dirty = false; + + mark_inode_dirty(&ni->vfs_inode); + goto out; + +out1: + ntfs_free(ni->attr_list.le); + ni->attr_list.le = NULL; + ni->attr_list.size = 0; + +out: + return err; +} + +/* + * ni_ins_attr_ext + * + * This method adds an external attribute to the ntfs_inode. + */ +static int ni_ins_attr_ext(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le, + enum ATTR_TYPE type, const __le16 *name, u8 name_len, + u32 asize, CLST svcn, u16 name_off, bool force_ext, + struct ATTRIB **ins_attr, struct mft_inode **ins_mi) +{ + struct ATTRIB *attr; + struct mft_inode *mi; + CLST rno; + u64 vbo; + struct rb_node *node; + int err; + bool is_mft, is_mft_data; + struct ntfs_sb_info *sbi = ni->mi.sbi; + + is_mft = ni->mi.rno == MFT_REC_MFT; + is_mft_data = is_mft && type == ATTR_DATA && !name_len; + + if (asize > sbi->max_bytes_per_attr) { + err = -EINVAL; + goto out; + } + + /* + * standard information and attr_list cannot be made external. + * The Log File cannot have any external attributes + */ + if (type == ATTR_STD || type == ATTR_LIST || + ni->mi.rno == MFT_REC_LOG) { + err = -EINVAL; + goto out; + } + + /* Create attribute list if it is not already existed */ + if (!ni->attr_list.size) { + err = ni_create_attr_list(ni); + if (err) + goto out; + } + + vbo = is_mft_data ? ((u64)svcn << sbi->cluster_bits) : 0; + + if (force_ext) + goto insert_ext; + + /* Load all subrecords into memory. */ + err = ni_load_all_mi(ni); + if (err) + goto out; + + /* Check each of loaded subrecord */ + for (node = rb_first(&ni->mi_tree); node; node = rb_next(node)) { + mi = rb_entry(node, struct mft_inode, node); + + if (is_mft_data && + (mi_enum_attr(mi, NULL) || + vbo <= ((u64)mi->rno << sbi->record_bits))) { + /* We can't accept this record 'case MFT's bootstrapping */ + continue; + } + if (is_mft && + mi_find_attr(mi, NULL, ATTR_DATA, NULL, 0, NULL)) { + /* + * This child record already has a ATTR_DATA. + * So it can't accept any other records. + */ + continue; + } + + if ((type != ATTR_NAME || name_len) && + mi_find_attr(mi, NULL, type, name, name_len, NULL)) { + /* Only indexed attributes can share same record */ + continue; + } + + /* Try to insert attribute into this subrecord */ + attr = ni_ins_new_attr(ni, mi, le, type, name, name_len, asize, + name_off, svcn); + if (!attr) + continue; + + if (ins_attr) + *ins_attr = attr; + return 0; + } + +insert_ext: + /* We have to allocate a new child subrecord*/ + err = ntfs_look_free_mft(sbi, &rno, is_mft_data, ni, &mi); + if (err) + goto out; + + if (is_mft_data && vbo <= ((u64)rno << sbi->record_bits)) { + err = -EINVAL; + goto out1; + } + + attr = ni_ins_new_attr(ni, mi, le, type, name, name_len, asize, + name_off, svcn); + if (!attr) + goto out2; + + if (ins_attr) + *ins_attr = attr; + if (ins_mi) + *ins_mi = mi; + + return 0; + +out2: + ni_remove_mi(ni, mi); + mi_put(mi); + err = -EINVAL; + +out1: + ntfs_mark_rec_free(sbi, rno); + +out: + return err; +} + +/* + * ni_insert_attr + * + * inserts an attribute into the file. + * + * If the primary record has room, it will just insert the attribute. + * If not, it may make the attribute external. + * For $MFT::Data it may make room for the attribute by + * making other attributes external. + * + * NOTE: + * The ATTR_LIST and ATTR_STD cannot be made external. + * This function does not fill new attribute full + * It only fills 'size'/'type'/'id'/'name_len' fields + */ +static int ni_insert_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, u32 asize, + u16 name_off, CLST svcn, struct ATTRIB **ins_attr, + struct mft_inode **ins_mi) +{ + struct ntfs_sb_info *sbi = ni->mi.sbi; + int err; + struct ATTRIB *attr, *eattr; + struct MFT_REC *rec; + bool is_mft; + struct ATTR_LIST_ENTRY *le; + u32 list_reserve, max_free, free, used, t32; + __le16 id; + u16 t16; + + is_mft = ni->mi.rno == MFT_REC_MFT; + rec = ni->mi.mrec; + + list_reserve = SIZEOF_NONRESIDENT + 3 * (1 + 2 * sizeof(u32)); + used = le32_to_cpu(rec->used); + free = sbi->record_size - used; + + if (is_mft && type != ATTR_LIST) { + /* Reserve space for the ATTRIB List. */ + if (free < list_reserve) + free = 0; + else + free -= list_reserve; + } + + if (asize <= free) { + attr = ni_ins_new_attr(ni, &ni->mi, NULL, type, name, name_len, + asize, name_off, svcn); + if (attr) { + if (ins_attr) + *ins_attr = attr; + if (ins_mi) + *ins_mi = &ni->mi; + err = 0; + goto out; + } + } + + if (!is_mft || type != ATTR_DATA || svcn) { + /* This ATTRIB will be external. */ + err = ni_ins_attr_ext(ni, NULL, type, name, name_len, asize, + svcn, name_off, false, ins_attr, ins_mi); + goto out; + } + + /* + * Here we have: "is_mft && type == ATTR_DATA && !svcn + * + * The first chunk of the $MFT::Data ATTRIB must be the base record. + * Evict as many other attributes as possible. + */ + max_free = free; + + /* Estimate the result of moving all possible attributes away.*/ + attr = NULL; + + while ((attr = mi_enum_attr(&ni->mi, attr))) { + if (attr->type == ATTR_STD) + continue; + if (attr->type == ATTR_LIST) + continue; + max_free += le32_to_cpu(attr->size); + } + + if (max_free < asize + list_reserve) { + /* Impossible to insert this attribute into primary record */ + err = -EINVAL; + goto out; + } + + /* Start real attribute moving */ + attr = NULL; + + for (;;) { + attr = mi_enum_attr(&ni->mi, attr); + if (!attr) { + /* We should never be here 'cause we have already check this case */ + err = -EINVAL; + goto out; + } + + /* Skip attributes that MUST be primary record */ + if (attr->type == ATTR_STD || attr->type == ATTR_LIST) + continue; + + le = NULL; + if (ni->attr_list.size) { + le = al_find_le(ni, NULL, attr); + if (!le) { + /* Really this is a serious bug */ + err = -EINVAL; + goto out; + } + } + + t32 = le32_to_cpu(attr->size); + t16 = le16_to_cpu(attr->name_off); + err = ni_ins_attr_ext(ni, le, attr->type, Add2Ptr(attr, t16), + attr->name_len, t32, attr_svcn(attr), t16, + false, &eattr, NULL); + if (err) + return err; + + id = eattr->id; + memcpy(eattr, attr, t32); + eattr->id = id; + + /* remove attrib from primary record */ + mi_remove_attr(&ni->mi, attr); + + /* attr now points to next attribute */ + if (attr->type == ATTR_END) + goto out; + } + while (asize + list_reserve > sbi->record_size - le32_to_cpu(rec->used)) + ; + + attr = ni_ins_new_attr(ni, &ni->mi, NULL, type, name, name_len, asize, + name_off, svcn); + if (!attr) { + err = -EINVAL; + goto out; + } + + if (ins_attr) + *ins_attr = attr; + if (ins_mi) + *ins_mi = &ni->mi; + +out: + return err; +} + +/* + * ni_expand_mft_list + * + * This method splits ATTR_DATA of $MFT + */ +static int ni_expand_mft_list(struct ntfs_inode *ni) +{ + int err = 0; + struct runs_tree *run = &ni->file.run; + u32 asize, run_size, done = 0; + struct ATTRIB *attr; + struct rb_node *node; + CLST mft_min, mft_new, svcn, evcn, plen; + struct mft_inode *mi, *mi_min, *mi_new; + struct ntfs_sb_info *sbi = ni->mi.sbi; + + /* Find the nearest Mft */ + mft_min = 0; + mft_new = 0; + mi_min = NULL; + + for (node = rb_first(&ni->mi_tree); node; node = rb_next(node)) { + mi = rb_entry(node, struct mft_inode, node); + + attr = mi_enum_attr(mi, NULL); + + if (!attr) { + mft_min = mi->rno; + mi_min = mi; + break; + } + } + + if (ntfs_look_free_mft(sbi, &mft_new, true, ni, &mi_new)) { + mft_new = 0; + // really this is not critical + } else if (mft_min > mft_new) { + mft_min = mft_new; + mi_min = mi_new; + } else { + ntfs_mark_rec_free(sbi, mft_new); + mft_new = 0; + ni_remove_mi(ni, mi_new); + } + + attr = mi_find_attr(&ni->mi, NULL, ATTR_DATA, NULL, 0, NULL); + if (!attr) { + err = -EINVAL; + goto out; + } + + asize = le32_to_cpu(attr->size); + + evcn = le64_to_cpu(attr->nres.evcn); + svcn = bytes_to_cluster(sbi, (u64)(mft_min + 1) << sbi->record_bits); + if (evcn + 1 >= svcn) { + err = -EINVAL; + goto out; + } + + /* + * split primary attribute [0 evcn] in two parts [0 svcn) + [svcn evcn] + * + * Update first part of ATTR_DATA in 'primary MFT + */ + err = run_pack(run, 0, svcn, Add2Ptr(attr, SIZEOF_NONRESIDENT), + asize - SIZEOF_NONRESIDENT, &plen); + if (err < 0) + goto out; + + run_size = QuadAlign(err); + err = 0; + + if (plen < svcn) { + err = -EINVAL; + goto out; + } + + attr->nres.evcn = cpu_to_le64(svcn - 1); + attr->size = cpu_to_le32(run_size + SIZEOF_NONRESIDENT); + /* 'done' - how many bytes of primary MFT becomes free */ + done = asize - run_size - SIZEOF_NONRESIDENT; + le32_sub_cpu(&ni->mi.mrec->used, done); + + /* Estimate the size of second part: run_buf=NULL */ + err = run_pack(run, svcn, evcn + 1 - svcn, NULL, sbi->record_size, + &plen); + if (err < 0) + goto out; + + run_size = QuadAlign(err); + err = 0; + + if (plen < evcn + 1 - svcn) { + err = -EINVAL; + goto out; + } + + /* + * This function may implicitly call expand attr_list + * Insert second part of ATTR_DATA in 'mi_min' + */ + attr = ni_ins_new_attr(ni, mi_min, NULL, ATTR_DATA, NULL, 0, + SIZEOF_NONRESIDENT + run_size, + SIZEOF_NONRESIDENT, svcn); + if (!attr) { + err = -EINVAL; + goto out; + } + + attr->non_res = 1; + attr->name_off = SIZEOF_NONRESIDENT_LE; + attr->flags = 0; + + run_pack(run, svcn, evcn + 1 - svcn, Add2Ptr(attr, SIZEOF_NONRESIDENT), + run_size, &plen); + + attr->nres.svcn = cpu_to_le64(svcn); + attr->nres.evcn = cpu_to_le64(evcn); + attr->nres.run_off = cpu_to_le16(SIZEOF_NONRESIDENT); + +out: + if (mft_new) { + ntfs_mark_rec_free(sbi, mft_new); + ni_remove_mi(ni, mi_new); + } + + return !err && !done ? -EOPNOTSUPP : err; +} + +/* + * ni_expand_list + * + * This method moves all possible attributes out of primary record + */ +int ni_expand_list(struct ntfs_inode *ni) +{ + int err = 0; + u32 asize, done = 0; + struct ATTRIB *attr, *ins_attr; + struct ATTR_LIST_ENTRY *le; + bool is_mft = ni->mi.rno == MFT_REC_MFT; + struct MFT_REF ref; + + mi_get_ref(&ni->mi, &ref); + le = NULL; + + while ((le = al_enumerate(ni, le))) { + if (le->type == ATTR_STD) + continue; + + if (memcmp(&ref, &le->ref, sizeof(struct MFT_REF))) + continue; + + if (is_mft && le->type == ATTR_DATA) + continue; + + /* Find attribute in primary record */ + attr = rec_find_attr_le(&ni->mi, le); + if (!attr) { + err = -EINVAL; + goto out; + } + + asize = le32_to_cpu(attr->size); + + /* Always insert into new record to avoid collisions (deep recursive) */ + err = ni_ins_attr_ext(ni, le, attr->type, attr_name(attr), + attr->name_len, asize, attr_svcn(attr), + le16_to_cpu(attr->name_off), true, + &ins_attr, NULL); + + if (err) + goto out; + + memcpy(ins_attr, attr, asize); + ins_attr->id = le->id; + mi_remove_attr(&ni->mi, attr); + + done += asize; + goto out; + } + + if (!is_mft) { + err = -EFBIG; /* attr list is too big(?) */ + goto out; + } + + /* split mft data as much as possible */ + err = ni_expand_mft_list(ni); + if (err) + goto out; + +out: + return !err && !done ? -EOPNOTSUPP : err; +} + +/* + * ni_insert_nonresident + * + * inserts new nonresident attribute + */ +int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, + const struct runs_tree *run, CLST svcn, CLST len, + __le16 flags, struct ATTRIB **new_attr, + struct mft_inode **mi) +{ + int err; + CLST plen; + struct ATTRIB *attr; + bool is_ext = + (flags & (ATTR_FLAG_SPARSED | ATTR_FLAG_COMPRESSED)) && !svcn; + u32 name_size = QuadAlign(name_len * sizeof(short)); + u32 name_off = is_ext ? SIZEOF_NONRESIDENT_EX : SIZEOF_NONRESIDENT; + u32 run_off = name_off + name_size; + u32 run_size, asize; + struct ntfs_sb_info *sbi = ni->mi.sbi; + + err = run_pack(run, svcn, len, NULL, sbi->max_bytes_per_attr - run_off, + &plen); + if (err < 0) + goto out; + + run_size = QuadAlign(err); + + if (plen < len) { + err = -EINVAL; + goto out; + } + + asize = run_off + run_size; + + if (asize > sbi->max_bytes_per_attr) { + err = -EINVAL; + goto out; + } + + err = ni_insert_attr(ni, type, name, name_len, asize, name_off, svcn, + &attr, mi); + + if (err) + goto out; + + attr->non_res = 1; + attr->name_off = cpu_to_le16(name_off); + attr->flags = flags; + + run_pack(run, svcn, len, Add2Ptr(attr, run_off), run_size, &plen); + + attr->nres.svcn = cpu_to_le64(svcn); + attr->nres.evcn = cpu_to_le64((u64)svcn + len - 1); + + err = 0; + if (new_attr) + *new_attr = attr; + + *(__le64 *)&attr->nres.run_off = cpu_to_le64(run_off); + + attr->nres.alloc_size = + svcn ? 0 : cpu_to_le64((u64)len << ni->mi.sbi->cluster_bits); + attr->nres.data_size = attr->nres.alloc_size; + attr->nres.valid_size = attr->nres.alloc_size; + + if (is_ext) { + if (flags & ATTR_FLAG_COMPRESSED) + attr->nres.c_unit = COMPRESSION_UNIT; + attr->nres.total_size = attr->nres.alloc_size; + } + +out: + return err; +} + +/* + * ni_insert_resident + * + * inserts new resident attribute + */ +int ni_insert_resident(struct ntfs_inode *ni, u32 data_size, + enum ATTR_TYPE type, const __le16 *name, u8 name_len, + struct ATTRIB **new_attr, struct mft_inode **mi) +{ + int err; + u32 name_size = QuadAlign(name_len * sizeof(short)); + u32 asize = SIZEOF_RESIDENT + name_size + QuadAlign(data_size); + struct ATTRIB *attr; + + err = ni_insert_attr(ni, type, name, name_len, asize, SIZEOF_RESIDENT, + 0, &attr, mi); + if (err) + return err; + + attr->non_res = 0; + attr->flags = 0; + + attr->res.data_size = cpu_to_le32(data_size); + attr->res.data_off = cpu_to_le16(SIZEOF_RESIDENT + name_size); + if (type == ATTR_NAME) + attr->res.flags = RESIDENT_FLAG_INDEXED; + attr->res.res = 0; + + if (new_attr) + *new_attr = attr; + + return 0; +} + +/* + * ni_remove_attr_le + * + * removes attribute from record + */ +int ni_remove_attr_le(struct ntfs_inode *ni, struct ATTRIB *attr, + struct ATTR_LIST_ENTRY *le) +{ + int err; + struct mft_inode *mi; + + err = ni_load_mi(ni, le, &mi); + if (err) + return err; + + mi_remove_attr(mi, attr); + + if (le) + al_remove_le(ni, le); + + return 0; +} + +/* + * ni_delete_all + * + * removes all attributes and frees allocates space + * ntfs_evict_inode->ntfs_clear_inode->ni_delete_all (if no links) + */ +int ni_delete_all(struct ntfs_inode *ni) +{ + int err; + struct ATTR_LIST_ENTRY *le = NULL; + struct ATTRIB *attr = NULL; + struct rb_node *node; + u16 roff; + u32 asize; + CLST svcn, evcn; + struct ntfs_sb_info *sbi = ni->mi.sbi; + bool nt3 = is_ntfs3(sbi); + struct MFT_REF ref; + + while ((attr = ni_enum_attr_ex(ni, attr, &le, NULL))) { + if (!nt3 || attr->name_len) { + ; + } else if (attr->type == ATTR_REPARSE) { + mi_get_ref(&ni->mi, &ref); + ntfs_remove_reparse(sbi, 0, &ref); + } else if (attr->type == ATTR_ID && !attr->non_res && + le32_to_cpu(attr->res.data_size) >= + sizeof(struct GUID)) { + ntfs_objid_remove(sbi, resident_data(attr)); + } + + if (!attr->non_res) + continue; + + svcn = le64_to_cpu(attr->nres.svcn); + evcn = le64_to_cpu(attr->nres.evcn); + + if (evcn + 1 <= svcn) + continue; + + asize = le32_to_cpu(attr->size); + roff = le16_to_cpu(attr->nres.run_off); + + /*run==1 means unpack and deallocate*/ + run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, evcn, svcn, + Add2Ptr(attr, roff), asize - roff); + } + + if (ni->attr_list.size) { + run_deallocate(ni->mi.sbi, &ni->attr_list.run, true); + al_destroy(ni); + } + + /* Free all subrecords */ + for (node = rb_first(&ni->mi_tree); node;) { + struct rb_node *next = rb_next(node); + struct mft_inode *mi = rb_entry(node, struct mft_inode, node); + + clear_rec_inuse(mi->mrec); + mi->dirty = true; + mi_write(mi, 0); + + ntfs_mark_rec_free(sbi, mi->rno); + ni_remove_mi(ni, mi); + mi_put(mi); + node = next; + } + + // Free base record + clear_rec_inuse(ni->mi.mrec); + ni->mi.dirty = true; + err = mi_write(&ni->mi, 0); + + ntfs_mark_rec_free(sbi, ni->mi.rno); + + return err; +} + +/* + * ni_fname_name + * + * returns file name attribute by its value + */ +struct ATTR_FILE_NAME *ni_fname_name(struct ntfs_inode *ni, + const struct cpu_str *uni, + const struct MFT_REF *home_dir, + struct ATTR_LIST_ENTRY **le) +{ + struct ATTRIB *attr = NULL; + struct ATTR_FILE_NAME *fname; + + *le = NULL; + + /* Enumerate all names */ +next: + attr = ni_find_attr(ni, attr, le, ATTR_NAME, NULL, 0, NULL, NULL); + if (!attr) + return NULL; + + fname = resident_data_ex(attr, SIZEOF_ATTRIBUTE_FILENAME); + if (!fname) + goto next; + + if (home_dir && memcmp(home_dir, &fname->home, sizeof(*home_dir))) + goto next; + + if (!uni) + goto next; + + if (uni->len != fname->name_len) + goto next; + + if (ntfs_cmp_names_cpu(uni, (struct le_str *)&fname->name_len, NULL, + false)) + goto next; + + return fname; +} + +/* + * ni_fname_type + * + * returns file name attribute with given type + */ +struct ATTR_FILE_NAME *ni_fname_type(struct ntfs_inode *ni, u8 name_type, + struct ATTR_LIST_ENTRY **le) +{ + struct ATTRIB *attr = NULL; + struct ATTR_FILE_NAME *fname; + + *le = NULL; + + /* Enumerate all names */ + for (;;) { + attr = ni_find_attr(ni, attr, le, ATTR_NAME, NULL, 0, NULL, + NULL); + if (!attr) + return NULL; + + fname = resident_data_ex(attr, SIZEOF_ATTRIBUTE_FILENAME); + if (fname && name_type == fname->type) + return fname; + } +} + +/* + * Process compressed/sparsed in special way + * NOTE: you need to set ni->std_fa = new_fa + * after this function to keep internal structures in consistency + */ +int ni_new_attr_flags(struct ntfs_inode *ni, enum FILE_ATTRIBUTE new_fa) +{ + struct ATTRIB *attr; + struct mft_inode *mi; + __le16 new_aflags; + u32 new_asize; + + attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, &mi); + if (!attr) + return -EINVAL; + + new_aflags = attr->flags; + + if (new_fa & FILE_ATTRIBUTE_SPARSE_FILE) + new_aflags |= ATTR_FLAG_SPARSED; + else + new_aflags &= ~ATTR_FLAG_SPARSED; + + if (new_fa & FILE_ATTRIBUTE_COMPRESSED) + new_aflags |= ATTR_FLAG_COMPRESSED; + else + new_aflags &= ~ATTR_FLAG_COMPRESSED; + + if (new_aflags == attr->flags) + return 0; + + if ((new_aflags & (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED)) == + (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED)) { + ntfs_inode_warn(&ni->vfs_inode, + "file can't be sparsed and compressed"); + return -EOPNOTSUPP; + } + + if (!attr->non_res) + goto out; + + if (attr->nres.data_size) { + ntfs_inode_warn( + &ni->vfs_inode, + "one can change sparsed/compressed only for empty files"); + return -EOPNOTSUPP; + } + + /* resize nonresident empty attribute in-place only*/ + new_asize = (new_aflags & (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED)) + ? (SIZEOF_NONRESIDENT_EX + 8) + : (SIZEOF_NONRESIDENT + 8); + + if (!mi_resize_attr(mi, attr, new_asize - le32_to_cpu(attr->size))) + return -EOPNOTSUPP; + + if (new_aflags & ATTR_FLAG_SPARSED) { + attr->name_off = SIZEOF_NONRESIDENT_EX_LE; + /* windows uses 16 clusters per frame but supports one cluster per frame too*/ + attr->nres.c_unit = 0; + ni->vfs_inode.i_mapping->a_ops = &ntfs_aops; + } else if (new_aflags & ATTR_FLAG_COMPRESSED) { + attr->name_off = SIZEOF_NONRESIDENT_EX_LE; + /* the only allowed: 16 clusters per frame */ + attr->nres.c_unit = NTFS_LZNT_CUNIT; + ni->vfs_inode.i_mapping->a_ops = &ntfs_aops_cmpr; + } else { + attr->name_off = SIZEOF_NONRESIDENT_LE; + /* normal files */ + attr->nres.c_unit = 0; + ni->vfs_inode.i_mapping->a_ops = &ntfs_aops; + } + attr->nres.run_off = attr->name_off; +out: + attr->flags = new_aflags; + mi->dirty = true; + + return 0; +} + +/* + * ni_parse_reparse + * + * buffer is at least 24 bytes + */ +enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr, + void *buffer) +{ + const struct REPARSE_DATA_BUFFER *rp = NULL; + u8 bits; + u16 len; + typeof(rp->CompressReparseBuffer) *cmpr; + + static_assert(sizeof(struct REPARSE_DATA_BUFFER) <= 24); + + /* Try to estimate reparse point */ + if (!attr->non_res) { + rp = resident_data_ex(attr, sizeof(struct REPARSE_DATA_BUFFER)); + } else if (le64_to_cpu(attr->nres.data_size) >= + sizeof(struct REPARSE_DATA_BUFFER)) { + struct runs_tree run; + + run_init(&run); + + if (!attr_load_runs_vcn(ni, ATTR_REPARSE, NULL, 0, &run, 0) && + !ntfs_read_run_nb(ni->mi.sbi, &run, 0, buffer, + sizeof(struct REPARSE_DATA_BUFFER), + NULL)) { + rp = buffer; + } + + run_close(&run); + } + + if (!rp) + return REPARSE_NONE; + + len = le16_to_cpu(rp->ReparseDataLength); + switch (rp->ReparseTag) { + case (IO_REPARSE_TAG_MICROSOFT | IO_REPARSE_TAG_SYMBOLIC_LINK): + break; /* Symbolic link */ + case IO_REPARSE_TAG_MOUNT_POINT: + break; /* Mount points and junctions */ + case IO_REPARSE_TAG_SYMLINK: + break; + case IO_REPARSE_TAG_COMPRESS: + /* + * WOF - Windows Overlay Filter - used to compress files with lzx/xpress + * Unlike native NTFS file compression, the Windows Overlay Filter supports + * only read operations. This means that it doesn’t need to sector-align each + * compressed chunk, so the compressed data can be packed more tightly together. + * If you open the file for writing, the Windows Overlay Filter just decompresses + * the entire file, turning it back into a plain file. + * + * ntfs3 driver decompresses the entire file only on write or change size requests + */ + + cmpr = &rp->CompressReparseBuffer; + if (len < sizeof(*cmpr) || + cmpr->WofVersion != WOF_CURRENT_VERSION || + cmpr->WofProvider != WOF_PROVIDER_SYSTEM || + cmpr->ProviderVer != WOF_PROVIDER_CURRENT_VERSION) { + return REPARSE_NONE; + } + + switch (cmpr->CompressionFormat) { + case WOF_COMPRESSION_XPRESS4K: + bits = 0xc; // 4k + break; + case WOF_COMPRESSION_XPRESS8K: + bits = 0xd; // 8k + break; + case WOF_COMPRESSION_XPRESS16K: + bits = 0xe; // 16k + break; + case WOF_COMPRESSION_LZX32K: + bits = 0xf; // 32k + break; + default: + bits = 0x10; // 64k + break; + } + ni_set_ext_compress_bits(ni, bits); + return REPARSE_COMPRESSED; + + case IO_REPARSE_TAG_DEDUP: + ni->ni_flags |= NI_FLAG_DEDUPLICATED; + return REPARSE_DEDUPLICATED; + + default: + if (rp->ReparseTag & IO_REPARSE_TAG_NAME_SURROGATE) + break; + + return REPARSE_NONE; + } + + /* Looks like normal symlink */ + return REPARSE_LINK; +} + +/* + * helper for file_fiemap + * assumed ni_lock + * TODO: less aggressive locks + */ +int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + __u64 vbo, __u64 len) +{ + int err = 0; + struct ntfs_sb_info *sbi = ni->mi.sbi; + u8 cluster_bits = sbi->cluster_bits; + struct runs_tree *run; + struct rw_semaphore *run_lock; + struct ATTRIB *attr; + CLST vcn = vbo >> cluster_bits; + CLST lcn, clen; + u64 valid = ni->i_valid; + u64 lbo, bytes; + u64 end, alloc_size; + size_t idx = -1; + u32 flags; + bool ok; + + if (S_ISDIR(ni->vfs_inode.i_mode)) { + run = &ni->dir.alloc_run; + attr = ni_find_attr(ni, NULL, NULL, ATTR_ALLOC, I30_NAME, + ARRAY_SIZE(I30_NAME), NULL, NULL); + run_lock = &ni->dir.run_lock; + } else { + run = &ni->file.run; + attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, + NULL); + if (!attr) { + err = -EINVAL; + goto out; + } + if (is_attr_compressed(attr)) { + /*unfortunately cp -r incorrectly treats compressed clusters*/ + err = -EOPNOTSUPP; + ntfs_inode_warn( + &ni->vfs_inode, + "fiemap is not supported for compressed file (cp -r)"); + goto out; + } + run_lock = &ni->file.run_lock; + } + + if (!attr || !attr->non_res) { + err = fiemap_fill_next_extent( + fieinfo, 0, 0, + attr ? le32_to_cpu(attr->res.data_size) : 0, + FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_LAST | + FIEMAP_EXTENT_MERGED); + goto out; + } + + end = vbo + len; + alloc_size = le64_to_cpu(attr->nres.alloc_size); + if (end > alloc_size) + end = alloc_size; + + down_read(run_lock); + + while (vbo < end) { + if (idx == -1) { + ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx); + } else { + CLST vcn_next = vcn; + + ok = run_get_entry(run, ++idx, &vcn, &lcn, &clen) && + vcn == vcn_next; + if (!ok) + vcn = vcn_next; + } + + if (!ok) { + up_read(run_lock); + down_write(run_lock); + + err = attr_load_runs_vcn(ni, attr->type, + attr_name(attr), + attr->name_len, run, vcn); + + up_write(run_lock); + down_read(run_lock); + + if (err) + break; + + ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx); + + if (!ok) { + err = -EINVAL; + break; + } + } + + if (!clen) { + err = -EINVAL; // ? + break; + } + + if (lcn == SPARSE_LCN) { + vcn += clen; + vbo = (u64)vcn << cluster_bits; + continue; + } + + flags = FIEMAP_EXTENT_MERGED; + if (S_ISDIR(ni->vfs_inode.i_mode)) { + ; + } else if (is_attr_compressed(attr)) { + CLST clst_data; + + err = attr_is_frame_compressed( + ni, attr, vcn >> attr->nres.c_unit, &clst_data); + if (err) + break; + if (clst_data < NTFS_LZNT_CLUSTERS) + flags |= FIEMAP_EXTENT_ENCODED; + } else if (is_attr_encrypted(attr)) { + flags |= FIEMAP_EXTENT_DATA_ENCRYPTED; + } + + vbo = (u64)vcn << cluster_bits; + bytes = (u64)clen << cluster_bits; + lbo = (u64)lcn << cluster_bits; + + vcn += clen; + + if (vbo + bytes >= end) { + bytes = end - vbo; + flags |= FIEMAP_EXTENT_LAST; + } + + if (vbo + bytes <= valid) { + ; + } else if (vbo >= valid) { + flags |= FIEMAP_EXTENT_UNWRITTEN; + } else { + /* vbo < valid && valid < vbo + bytes */ + u64 dlen = valid - vbo; + + err = fiemap_fill_next_extent(fieinfo, vbo, lbo, dlen, + flags); + if (err < 0) + break; + if (err == 1) { + err = 0; + break; + } + + vbo = valid; + bytes -= dlen; + if (!bytes) + continue; + + lbo += dlen; + flags |= FIEMAP_EXTENT_UNWRITTEN; + } + + err = fiemap_fill_next_extent(fieinfo, vbo, lbo, bytes, flags); + if (err < 0) + break; + if (err == 1) { + err = 0; + break; + } + + vbo += bytes; + } + + up_read(run_lock); + +out: + return err; +} + +/* + * When decompressing, we typically obtain more than one page per reference. + * We inject the additional pages into the page cache. + */ +int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page) +{ + int err; + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct address_space *mapping = page->mapping; + pgoff_t index = page->index; + u64 frame_vbo, vbo = (u64)index << PAGE_SHIFT; + struct page **pages = NULL; /*array of at most 16 pages. stack?*/ + u8 frame_bits; + CLST frame; + u32 i, idx, frame_size, pages_per_frame; + gfp_t gfp_mask; + struct page *pg; + + if (vbo >= ni->vfs_inode.i_size) { + SetPageUptodate(page); + err = 0; + goto out; + } + + if (ni->ni_flags & NI_FLAG_COMPRESSED_MASK) { + /* xpress or lzx */ + frame_bits = ni_ext_compress_bits(ni); + } else { + /* lznt compression*/ + frame_bits = NTFS_LZNT_CUNIT + sbi->cluster_bits; + } + frame_size = 1u << frame_bits; + frame = vbo >> frame_bits; + frame_vbo = (u64)frame << frame_bits; + idx = (vbo - frame_vbo) >> PAGE_SHIFT; + + pages_per_frame = frame_size >> PAGE_SHIFT; + pages = ntfs_zalloc(pages_per_frame * sizeof(struct page *)); + if (!pages) { + err = -ENOMEM; + goto out; + } + + pages[idx] = page; + index = frame_vbo >> PAGE_SHIFT; + gfp_mask = mapping_gfp_mask(mapping); + + for (i = 0; i < pages_per_frame; i++, index++) { + if (i == idx) + continue; + + pg = find_or_create_page(mapping, index, gfp_mask); + if (!pg) { + err = -ENOMEM; + goto out1; + } + pages[i] = pg; + } + + err = ni_read_frame(ni, frame_vbo, pages, pages_per_frame); + +out1: + if (err) + SetPageError(page); + + for (i = 0; i < pages_per_frame; i++) { + pg = pages[i]; + if (i == idx) + continue; + unlock_page(pg); + put_page(pg); + } + +out: + /* At this point, err contains 0 or -EIO depending on the "critical" page */ + ntfs_free(pages); + unlock_page(page); + + return err; +} + +#ifdef CONFIG_NTFS3_LZX_XPRESS +/* + * decompress lzx/xpress compressed file + * remove ATTR_DATA::WofCompressedData + * remove ATTR_REPARSE + */ +int ni_decompress_file(struct ntfs_inode *ni) +{ + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct inode *inode = &ni->vfs_inode; + loff_t i_size = inode->i_size; + struct address_space *mapping = inode->i_mapping; + gfp_t gfp_mask = mapping_gfp_mask(mapping); + struct page **pages = NULL; + struct ATTR_LIST_ENTRY *le; + struct ATTRIB *attr; + CLST vcn, cend, lcn, clen, end; + pgoff_t index; + u64 vbo; + u8 frame_bits; + u32 i, frame_size, pages_per_frame, bytes; + struct mft_inode *mi; + int err; + + /* clusters for decompressed data*/ + cend = bytes_to_cluster(sbi, i_size); + + if (!i_size) + goto remove_wof; + + /* check in advance */ + if (cend > wnd_zeroes(&sbi->used.bitmap)) { + err = -ENOSPC; + goto out; + } + + frame_bits = ni_ext_compress_bits(ni); + frame_size = 1u << frame_bits; + pages_per_frame = frame_size >> PAGE_SHIFT; + pages = ntfs_zalloc(pages_per_frame * sizeof(struct page *)); + if (!pages) { + err = -ENOMEM; + goto out; + } + + /* + * Step 1: decompress data and copy to new allocated clusters + */ + index = 0; + for (vbo = 0; vbo < i_size; vbo += bytes) { + u32 nr_pages; + bool new; + + if (vbo + frame_size > i_size) { + bytes = i_size - vbo; + nr_pages = (bytes + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else { + nr_pages = pages_per_frame; + bytes = frame_size; + } + + end = bytes_to_cluster(sbi, vbo + bytes); + + for (vcn = vbo >> sbi->cluster_bits; vcn < end; vcn += clen) { + err = attr_data_get_block(ni, vcn, cend - vcn, &lcn, + &clen, &new); + if (err) + goto out; + } + + for (i = 0; i < pages_per_frame; i++, index++) { + struct page *pg; + + pg = find_or_create_page(mapping, index, gfp_mask); + if (!pg) { + while (i--) { + unlock_page(pages[i]); + put_page(pages[i]); + } + err = -ENOMEM; + goto out; + } + pages[i] = pg; + } + + err = ni_read_frame(ni, vbo, pages, pages_per_frame); + + if (!err) { + down_read(&ni->file.run_lock); + err = ntfs_bio_pages(sbi, &ni->file.run, pages, + nr_pages, vbo, bytes, + REQ_OP_WRITE); + up_read(&ni->file.run_lock); + } + + for (i = 0; i < pages_per_frame; i++) { + unlock_page(pages[i]); + put_page(pages[i]); + } + + if (err) + goto out; + + cond_resched(); + } + +remove_wof: + /* + * Step 2: deallocate attributes ATTR_DATA::WofCompressedData and ATTR_REPARSE + */ + attr = NULL; + le = NULL; + while ((attr = ni_enum_attr_ex(ni, attr, &le, NULL))) { + CLST svcn, evcn; + u32 asize, roff; + + if (attr->type == ATTR_REPARSE) { + struct MFT_REF ref; + + mi_get_ref(&ni->mi, &ref); + ntfs_remove_reparse(sbi, 0, &ref); + } + + if (!attr->non_res) + continue; + + if (attr->type != ATTR_REPARSE && + (attr->type != ATTR_DATA || + attr->name_len != ARRAY_SIZE(WOF_NAME) || + memcmp(attr_name(attr), WOF_NAME, sizeof(WOF_NAME)))) + continue; + + svcn = le64_to_cpu(attr->nres.svcn); + evcn = le64_to_cpu(attr->nres.evcn); + + if (evcn + 1 <= svcn) + continue; + + asize = le32_to_cpu(attr->size); + roff = le16_to_cpu(attr->nres.run_off); + + /*run==1 means unpack and deallocate*/ + run_unpack_ex(RUN_DEALLOCATE, sbi, ni->mi.rno, svcn, evcn, svcn, + Add2Ptr(attr, roff), asize - roff); + } + + /* + * Step 3: remove attribute ATTR_DATA::WofCompressedData + */ + err = ni_remove_attr(ni, ATTR_DATA, WOF_NAME, ARRAY_SIZE(WOF_NAME), + false, NULL); + if (err) + goto out; + + /* + * Step 4: remove ATTR_REPARSE + */ + err = ni_remove_attr(ni, ATTR_REPARSE, NULL, 0, false, NULL); + if (err) + goto out; + + /* + * Step 5: remove sparse flag from data attribute + */ + attr = ni_find_attr(ni, NULL, NULL, ATTR_DATA, NULL, 0, NULL, &mi); + if (!attr) { + err = -EINVAL; + goto out; + } + + if (attr->non_res && is_attr_sparsed(attr)) { + /* sparsed attribute header is 8 bytes bigger than normal*/ + struct MFT_REC *rec = mi->mrec; + u32 used = le32_to_cpu(rec->used); + u32 asize = le32_to_cpu(attr->size); + u16 roff = le16_to_cpu(attr->nres.run_off); + char *rbuf = Add2Ptr(attr, roff); + + memmove(rbuf - 8, rbuf, used - PtrOffset(rec, rbuf)); + attr->size = cpu_to_le32(asize - 8); + attr->flags &= ~ATTR_FLAG_SPARSED; + attr->nres.run_off = cpu_to_le16(roff - 8); + attr->nres.c_unit = 0; + rec->used = cpu_to_le32(used - 8); + mi->dirty = true; + ni->std_fa &= ~(FILE_ATTRIBUTE_SPARSE_FILE | + FILE_ATTRIBUTE_REPARSE_POINT); + + mark_inode_dirty(inode); + } + + /* clear cached flag */ + ni->ni_flags &= ~NI_FLAG_COMPRESSED_MASK; + if (ni->file.offs_page) { + put_page(ni->file.offs_page); + ni->file.offs_page = NULL; + } + mapping->a_ops = &ntfs_aops; + +out: + ntfs_free(pages); + if (err) { + make_bad_inode(inode); + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + } + + return err; +} + +/* external compression lzx/xpress */ +static int decompress_lzx_xpress(struct ntfs_sb_info *sbi, const char *cmpr, + size_t cmpr_size, void *unc, size_t unc_size, + u32 frame_size) +{ + int err; + void *ctx; + + if (cmpr_size == unc_size) { + /* frame not compressed */ + memcpy(unc, cmpr, unc_size); + return 0; + } + + err = 0; + if (frame_size == 0x8000) { + mutex_lock(&sbi->compress.mtx_lzx); + /* LZX: frame compressed */ + ctx = sbi->compress.lzx; + if (!ctx) { + /* Lazy initialize lzx decompress context */ + ctx = lzx_allocate_decompressor(); + if (!ctx) { + err = -ENOMEM; + goto out1; + } + + sbi->compress.lzx = ctx; + } + + if (lzx_decompress(ctx, cmpr, cmpr_size, unc, unc_size)) { + /* treat all errors as "invalid argument" */ + err = -EINVAL; + } +out1: + mutex_unlock(&sbi->compress.mtx_lzx); + } else { + /* XPRESS: frame compressed */ + mutex_lock(&sbi->compress.mtx_xpress); + ctx = sbi->compress.xpress; + if (!ctx) { + /* Lazy initialize xpress decompress context */ + ctx = xpress_allocate_decompressor(); + if (!ctx) { + err = -ENOMEM; + goto out2; + } + + sbi->compress.xpress = ctx; + } + + if (xpress_decompress(ctx, cmpr, cmpr_size, unc, unc_size)) { + /* treat all errors as "invalid argument" */ + err = -EINVAL; + } +out2: + mutex_unlock(&sbi->compress.mtx_xpress); + } + return err; +} +#endif + +/* + * ni_read_frame + * + * pages - array of locked pages + */ +int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, + u32 pages_per_frame) +{ + int err; + struct ntfs_sb_info *sbi = ni->mi.sbi; + u8 cluster_bits = sbi->cluster_bits; + char *frame_ondisk = NULL; + char *frame_mem = NULL; + struct page **pages_disk = NULL; + struct ATTR_LIST_ENTRY *le = NULL; + struct runs_tree *run = &ni->file.run; + u64 valid_size = ni->i_valid; + u64 vbo_disk; + size_t unc_size; + u32 frame_size, i, npages_disk, ondisk_size; + struct page *pg; + struct ATTRIB *attr; + CLST frame, clst_data; + + /* + * To simplify decompress algorithm do vmap for source and target pages + */ + for (i = 0; i < pages_per_frame; i++) + kmap(pages[i]); + + frame_size = pages_per_frame << PAGE_SHIFT; + frame_mem = vmap(pages, pages_per_frame, VM_MAP, PAGE_KERNEL); + if (!frame_mem) { + err = -ENOMEM; + goto out; + } + + attr = ni_find_attr(ni, NULL, &le, ATTR_DATA, NULL, 0, NULL, NULL); + if (!attr) { + err = -ENOENT; + goto out1; + } + + if (!attr->non_res) { + u32 data_size = le32_to_cpu(attr->res.data_size); + + memset(frame_mem, 0, frame_size); + if (frame_vbo < data_size) { + ondisk_size = data_size - frame_vbo; + memcpy(frame_mem, resident_data(attr) + frame_vbo, + min(ondisk_size, frame_size)); + } + err = 0; + goto out1; + } + + if (frame_vbo >= valid_size) { + memset(frame_mem, 0, frame_size); + err = 0; + goto out1; + } + + if (ni->ni_flags & NI_FLAG_COMPRESSED_MASK) { +#ifndef CONFIG_NTFS3_LZX_XPRESS + err = -EOPNOTSUPP; + goto out1; +#else + u32 frame_bits = ni_ext_compress_bits(ni); + u64 frame64 = frame_vbo >> frame_bits; + u64 frames, vbo_data; + + if (frame_size != (1u << frame_bits)) { + err = -EINVAL; + goto out1; + } + switch (frame_size) { + case 0x1000: + case 0x2000: + case 0x4000: + case 0x8000: + break; + default: + /* unknown compression */ + err = -EOPNOTSUPP; + goto out1; + } + + attr = ni_find_attr(ni, attr, &le, ATTR_DATA, WOF_NAME, + ARRAY_SIZE(WOF_NAME), NULL, NULL); + if (!attr) { + ntfs_inode_err( + &ni->vfs_inode, + "external compressed file should contains data attribute \"WofCompressedData\""); + err = -EINVAL; + goto out1; + } + + if (!attr->non_res) { + run = NULL; + } else { + run = run_alloc(); + if (!run) { + err = -ENOMEM; + goto out1; + } + } + + frames = (ni->vfs_inode.i_size - 1) >> frame_bits; + + err = attr_wof_frame_info(ni, attr, run, frame64, frames, + frame_bits, &ondisk_size, &vbo_data); + if (err) + goto out2; + + if (frame64 == frames) { + unc_size = 1 + ((ni->vfs_inode.i_size - 1) & + (frame_size - 1)); + ondisk_size = attr_size(attr) - vbo_data; + } else { + unc_size = frame_size; + } + + if (ondisk_size > frame_size) { + err = -EINVAL; + goto out2; + } + + if (!attr->non_res) { + if (vbo_data + ondisk_size > + le32_to_cpu(attr->res.data_size)) { + err = -EINVAL; + goto out1; + } + + err = decompress_lzx_xpress( + sbi, Add2Ptr(resident_data(attr), vbo_data), + ondisk_size, frame_mem, unc_size, frame_size); + goto out1; + } + vbo_disk = vbo_data; + /* load all runs to read [vbo_disk-vbo_to) */ + err = attr_load_runs_range(ni, ATTR_DATA, WOF_NAME, + ARRAY_SIZE(WOF_NAME), run, vbo_disk, + vbo_data + ondisk_size); + if (err) + goto out2; + npages_disk = (ondisk_size + (vbo_disk & (PAGE_SIZE - 1)) + + PAGE_SIZE - 1) >> + PAGE_SHIFT; +#endif + } else if (is_attr_compressed(attr)) { + /* lznt compression*/ + if (sbi->cluster_size > NTFS_LZNT_MAX_CLUSTER) { + err = -EOPNOTSUPP; + goto out1; + } + + if (attr->nres.c_unit != NTFS_LZNT_CUNIT) { + err = -EOPNOTSUPP; + goto out1; + } + + down_write(&ni->file.run_lock); + run_truncate_around(run, le64_to_cpu(attr->nres.svcn)); + frame = frame_vbo >> (cluster_bits + NTFS_LZNT_CUNIT); + err = attr_is_frame_compressed(ni, attr, frame, &clst_data); + up_write(&ni->file.run_lock); + if (err) + goto out1; + + if (!clst_data) { + memset(frame_mem, 0, frame_size); + goto out1; + } + + frame_size = sbi->cluster_size << NTFS_LZNT_CUNIT; + ondisk_size = clst_data << cluster_bits; + + if (clst_data >= NTFS_LZNT_CLUSTERS) { + /* frame is not compressed */ + down_read(&ni->file.run_lock); + err = ntfs_bio_pages(sbi, run, pages, pages_per_frame, + frame_vbo, ondisk_size, + REQ_OP_READ); + up_read(&ni->file.run_lock); + goto out1; + } + vbo_disk = frame_vbo; + npages_disk = (ondisk_size + PAGE_SIZE - 1) >> PAGE_SHIFT; + } else { + __builtin_unreachable(); + err = -EINVAL; + goto out1; + } + + pages_disk = ntfs_zalloc(npages_disk * sizeof(struct page *)); + if (!pages_disk) { + err = -ENOMEM; + goto out2; + } + + for (i = 0; i < npages_disk; i++) { + pg = alloc_page(GFP_KERNEL); + if (!pg) { + err = -ENOMEM; + goto out3; + } + pages_disk[i] = pg; + lock_page(pg); + kmap(pg); + } + + /* read 'ondisk_size' bytes from disk */ + down_read(&ni->file.run_lock); + err = ntfs_bio_pages(sbi, run, pages_disk, npages_disk, vbo_disk, + ondisk_size, REQ_OP_READ); + up_read(&ni->file.run_lock); + if (err) + goto out3; + + /* + * To simplify decompress algorithm do vmap for source and target pages + */ + frame_ondisk = vmap(pages_disk, npages_disk, VM_MAP, PAGE_KERNEL_RO); + if (!frame_ondisk) { + err = -ENOMEM; + goto out3; + } + + /* decompress: frame_ondisk -> frame_mem */ +#ifdef CONFIG_NTFS3_LZX_XPRESS + if (run != &ni->file.run) { + /* LZX or XPRESS */ + err = decompress_lzx_xpress( + sbi, frame_ondisk + (vbo_disk & (PAGE_SIZE - 1)), + ondisk_size, frame_mem, unc_size, frame_size); + } else +#endif + { + /* LZNT - native ntfs compression */ + unc_size = decompress_lznt(frame_ondisk, ondisk_size, frame_mem, + frame_size); + if ((ssize_t)unc_size < 0) + err = unc_size; + else if (!unc_size || unc_size > frame_size) + err = -EINVAL; + } + if (!err && valid_size < frame_vbo + frame_size) { + size_t ok = valid_size - frame_vbo; + + memset(frame_mem + ok, 0, frame_size - ok); + } + + vunmap(frame_ondisk); + +out3: + for (i = 0; i < npages_disk; i++) { + pg = pages_disk[i]; + if (pg) { + kunmap(pg); + unlock_page(pg); + put_page(pg); + } + } + ntfs_free(pages_disk); + +out2: +#ifdef CONFIG_NTFS3_LZX_XPRESS + if (run != &ni->file.run) + run_free(run); +#endif +out1: + vunmap(frame_mem); +out: + for (i = 0; i < pages_per_frame; i++) { + pg = pages[i]; + kunmap(pg); + ClearPageError(pg); + SetPageUptodate(pg); + } + + return err; +} + +/* + * ni_write_frame + * + * pages - array of locked pages + */ +int ni_write_frame(struct ntfs_inode *ni, struct page **pages, + u32 pages_per_frame) +{ + int err; + struct ntfs_sb_info *sbi = ni->mi.sbi; + u8 frame_bits = NTFS_LZNT_CUNIT + sbi->cluster_bits; + u32 frame_size = sbi->cluster_size << NTFS_LZNT_CUNIT; + u64 frame_vbo = (u64)pages[0]->index << PAGE_SHIFT; + CLST frame = frame_vbo >> frame_bits; + char *frame_ondisk = NULL; + struct page **pages_disk = NULL; + struct ATTR_LIST_ENTRY *le = NULL; + char *frame_mem; + struct ATTRIB *attr; + struct mft_inode *mi; + u32 i; + struct page *pg; + size_t compr_size, ondisk_size; + struct lznt *lznt; + + attr = ni_find_attr(ni, NULL, &le, ATTR_DATA, NULL, 0, NULL, &mi); + if (!attr) { + err = -ENOENT; + goto out; + } + + if (WARN_ON(!is_attr_compressed(attr))) { + err = -EINVAL; + goto out; + } + + if (sbi->cluster_size > NTFS_LZNT_MAX_CLUSTER) { + err = -EOPNOTSUPP; + goto out; + } + + if (!attr->non_res) { + down_write(&ni->file.run_lock); + err = attr_make_nonresident(ni, attr, le, mi, + le32_to_cpu(attr->res.data_size), + &ni->file.run, &attr, pages[0]); + up_write(&ni->file.run_lock); + if (err) + goto out; + } + + if (attr->nres.c_unit != NTFS_LZNT_CUNIT) { + err = -EOPNOTSUPP; + goto out; + } + + pages_disk = ntfs_zalloc(pages_per_frame * sizeof(struct page *)); + if (!pages_disk) { + err = -ENOMEM; + goto out; + } + + for (i = 0; i < pages_per_frame; i++) { + pg = alloc_page(GFP_KERNEL); + if (!pg) { + err = -ENOMEM; + goto out1; + } + pages_disk[i] = pg; + lock_page(pg); + kmap(pg); + } + + /* + * To simplify compress algorithm do vmap for source and target pages + */ + frame_ondisk = vmap(pages_disk, pages_per_frame, VM_MAP, PAGE_KERNEL); + if (!frame_ondisk) { + err = -ENOMEM; + goto out1; + } + + for (i = 0; i < pages_per_frame; i++) + kmap(pages[i]); + + /* map in-memory frame for read-only */ + frame_mem = vmap(pages, pages_per_frame, VM_MAP, PAGE_KERNEL_RO); + if (!frame_mem) { + err = -ENOMEM; + goto out2; + } + + mutex_lock(&sbi->compress.mtx_lznt); + lznt = NULL; + if (!sbi->compress.lznt) { + /* + * lznt implements two levels of compression: + * 0 - standard compression + * 1 - best compression, requires a lot of cpu + * use mount option? + */ + lznt = get_lznt_ctx(0); + if (!lznt) { + mutex_unlock(&sbi->compress.mtx_lznt); + err = -ENOMEM; + goto out3; + } + + sbi->compress.lznt = lznt; + lznt = NULL; + } + + /* compress: frame_mem -> frame_ondisk */ + compr_size = compress_lznt(frame_mem, frame_size, frame_ondisk, + frame_size, sbi->compress.lznt); + mutex_unlock(&sbi->compress.mtx_lznt); + ntfs_free(lznt); + + if (compr_size + sbi->cluster_size > frame_size) { + /* frame is not compressed */ + compr_size = frame_size; + ondisk_size = frame_size; + } else if (compr_size) { + /* frame is compressed */ + ondisk_size = ntfs_up_cluster(sbi, compr_size); + memset(frame_ondisk + compr_size, 0, ondisk_size - compr_size); + } else { + /* frame is sparsed */ + ondisk_size = 0; + } + + down_write(&ni->file.run_lock); + run_truncate_around(&ni->file.run, le64_to_cpu(attr->nres.svcn)); + err = attr_allocate_frame(ni, frame, compr_size, ni->i_valid); + up_write(&ni->file.run_lock); + if (err) + goto out2; + + if (!ondisk_size) + goto out2; + + down_read(&ni->file.run_lock); + err = ntfs_bio_pages(sbi, &ni->file.run, + ondisk_size < frame_size ? pages_disk : pages, + pages_per_frame, frame_vbo, ondisk_size, + REQ_OP_WRITE); + up_read(&ni->file.run_lock); + +out3: + vunmap(frame_mem); + +out2: + for (i = 0; i < pages_per_frame; i++) + kunmap(pages[i]); + + vunmap(frame_ondisk); +out1: + for (i = 0; i < pages_per_frame; i++) { + pg = pages_disk[i]; + if (pg) { + kunmap(pg); + unlock_page(pg); + put_page(pg); + } + } + ntfs_free(pages_disk); +out: + return err; +} + +/* + * update duplicate info of ATTR_FILE_NAME in MFT and in parent directories + */ +static bool ni_update_parent(struct ntfs_inode *ni, struct NTFS_DUP_INFO *dup, + int sync) +{ + struct ATTRIB *attr; + struct mft_inode *mi; + struct ATTR_LIST_ENTRY *le = NULL; + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct super_block *sb = sbi->sb; + bool re_dirty = false; + bool active = sb->s_flags & SB_ACTIVE; + bool upd_parent = ni->ni_flags & NI_FLAG_UPDATE_PARENT; + + if (ni->mi.mrec->flags & RECORD_FLAG_DIR) { + dup->fa |= FILE_ATTRIBUTE_DIRECTORY; + attr = NULL; + dup->alloc_size = 0; + dup->data_size = 0; + } else { + dup->fa &= ~FILE_ATTRIBUTE_DIRECTORY; + + attr = ni_find_attr(ni, NULL, &le, ATTR_DATA, NULL, 0, NULL, + &mi); + if (!attr) { + dup->alloc_size = dup->data_size = 0; + } else if (!attr->non_res) { + u32 data_size = le32_to_cpu(attr->res.data_size); + + dup->alloc_size = cpu_to_le64(QuadAlign(data_size)); + dup->data_size = cpu_to_le64(data_size); + } else { + u64 new_valid = ni->i_valid; + u64 data_size = le64_to_cpu(attr->nres.data_size); + __le64 valid_le; + + dup->alloc_size = is_attr_ext(attr) + ? attr->nres.total_size + : attr->nres.alloc_size; + dup->data_size = attr->nres.data_size; + + if (new_valid > data_size) + new_valid = data_size; + + valid_le = cpu_to_le64(new_valid); + if (valid_le != attr->nres.valid_size) { + attr->nres.valid_size = valid_le; + mi->dirty = true; + } + } + } + + /* TODO: fill reparse info */ + dup->reparse = 0; + dup->ea_size = 0; + + if (ni->ni_flags & NI_FLAG_EA) { + attr = ni_find_attr(ni, attr, &le, ATTR_EA_INFO, NULL, 0, NULL, + NULL); + if (attr) { + const struct EA_INFO *info; + + info = resident_data_ex(attr, sizeof(struct EA_INFO)); + dup->ea_size = info->size_pack; + } + } + + attr = NULL; + le = NULL; + + while ((attr = ni_find_attr(ni, attr, &le, ATTR_NAME, NULL, 0, NULL, + &mi))) { + struct inode *dir; + struct ATTR_FILE_NAME *fname; + + fname = resident_data_ex(attr, SIZEOF_ATTRIBUTE_FILENAME); + if (!fname) + continue; + + if (memcmp(&fname->dup, dup, sizeof(fname->dup))) { + memcpy(&fname->dup, dup, sizeof(fname->dup)); + mi->dirty = true; + } else if (!upd_parent) { + continue; + } + + if (!active) + continue; /*avoid __wait_on_freeing_inode(inode); */ + + /*ntfs_iget5 may sleep*/ + dir = ntfs_iget5(sb, &fname->home, NULL); + if (IS_ERR(dir)) { + ntfs_inode_warn( + &ni->vfs_inode, + "failed to open parent directory r=%lx to update", + (long)ino_get(&fname->home)); + continue; + } + + if (!is_bad_inode(dir)) { + struct ntfs_inode *dir_ni = ntfs_i(dir); + + if (!ni_trylock(dir_ni)) { + re_dirty = true; + } else { + indx_update_dup(dir_ni, sbi, fname, dup, sync); + ni_unlock(dir_ni); + } + } + iput(dir); + } + + return re_dirty; +} + +/* + * ni_write_inode + * + * write mft base record and all subrecords to disk + */ +int ni_write_inode(struct inode *inode, int sync, const char *hint) +{ + int err = 0, err2; + struct ntfs_inode *ni = ntfs_i(inode); + struct super_block *sb = inode->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + bool re_dirty = false; + struct ATTR_STD_INFO *std; + struct rb_node *node, *next; + struct NTFS_DUP_INFO dup; + + if (is_bad_inode(inode) || sb_rdonly(sb)) + return 0; + + if (!ni_trylock(ni)) { + /* 'ni' is under modification, skip for now */ + mark_inode_dirty_sync(inode); + return 0; + } + + if (is_rec_inuse(ni->mi.mrec) && + !(sbi->flags & NTFS_FLAGS_LOG_REPLAYING) && inode->i_nlink) { + bool modified = false; + + /* update times in standard attribute */ + std = ni_std(ni); + if (!std) { + err = -EINVAL; + goto out; + } + + /* Update the access times if they have changed. */ + dup.m_time = kernel2nt(&inode->i_mtime); + if (std->m_time != dup.m_time) { + std->m_time = dup.m_time; + modified = true; + } + + dup.c_time = kernel2nt(&inode->i_ctime); + if (std->c_time != dup.c_time) { + std->c_time = dup.c_time; + modified = true; + } + + dup.a_time = kernel2nt(&inode->i_atime); + if (std->a_time != dup.a_time) { + std->a_time = dup.a_time; + modified = true; + } + + dup.fa = ni->std_fa; + if (std->fa != dup.fa) { + std->fa = dup.fa; + modified = true; + } + + if (modified) + ni->mi.dirty = true; + + if (!ntfs_is_meta_file(sbi, inode->i_ino) && + (modified || (ni->ni_flags & NI_FLAG_UPDATE_PARENT))) { + dup.cr_time = std->cr_time; + /* Not critical if this function fail */ + re_dirty = ni_update_parent(ni, &dup, sync); + + if (re_dirty) + ni->ni_flags |= NI_FLAG_UPDATE_PARENT; + else + ni->ni_flags &= ~NI_FLAG_UPDATE_PARENT; + } + + /* update attribute list */ + if (ni->attr_list.size && ni->attr_list.dirty) { + if (inode->i_ino != MFT_REC_MFT || sync) { + err = ni_try_remove_attr_list(ni); + if (err) + goto out; + } + + err = al_update(ni); + if (err) + goto out; + } + } + + for (node = rb_first(&ni->mi_tree); node; node = next) { + struct mft_inode *mi = rb_entry(node, struct mft_inode, node); + bool is_empty; + + next = rb_next(node); + + if (!mi->dirty) + continue; + + is_empty = !mi_enum_attr(mi, NULL); + + if (is_empty) + clear_rec_inuse(mi->mrec); + + err2 = mi_write(mi, sync); + if (!err && err2) + err = err2; + + if (is_empty) { + ntfs_mark_rec_free(sbi, mi->rno); + rb_erase(node, &ni->mi_tree); + mi_put(mi); + } + } + + if (ni->mi.dirty) { + err2 = mi_write(&ni->mi, sync); + if (!err && err2) + err = err2; + } +out: + ni_unlock(ni); + + if (err) { + ntfs_err(sb, "%s r=%lx failed, %d.", hint, inode->i_ino, err); + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + return err; + } + + if (re_dirty && (sb->s_flags & SB_ACTIVE)) + mark_inode_dirty_sync(inode); + + return 0; +} diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c new file mode 100644 index 000000000000..fc287a500fda --- /dev/null +++ b/fs/ntfs3/fslog.c @@ -0,0 +1,5208 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" + +/* + * LOG FILE structs + */ + +// clang-format off + +#define MaxLogFileSize 0x100000000ull +#define DefaultLogPageSize 4096 +#define MinLogRecordPages 0x30 + +struct RESTART_HDR { + struct NTFS_RECORD_HEADER rhdr; // 'RSTR' + __le32 sys_page_size; // 0x10: Page size of the system which initialized the log + __le32 page_size; // 0x14: Log page size used for this log file + __le16 ra_off; // 0x18: + __le16 minor_ver; // 0x1A: + __le16 major_ver; // 0x1C: + __le16 fixups[1]; +}; + +#define LFS_NO_CLIENT 0xffff +#define LFS_NO_CLIENT_LE cpu_to_le16(0xffff) + +struct CLIENT_REC { + __le64 oldest_lsn; + __le64 restart_lsn; // 0x08: + __le16 prev_client; // 0x10: + __le16 next_client; // 0x12: + __le16 seq_num; // 0x14: + u8 align[6]; // 0x16 + __le32 name_bytes; // 0x1C: in bytes + __le16 name[32]; // 0x20: name of client +}; + +static_assert(sizeof(struct CLIENT_REC) == 0x60); + +/* Two copies of these will exist at the beginning of the log file */ +struct RESTART_AREA { + __le64 current_lsn; // 0x00: Current logical end of log file + __le16 log_clients; // 0x08: Maximum number of clients + __le16 client_idx[2]; // 0x0A: free/use index into the client record arrays + __le16 flags; // 0x0E: See RESTART_SINGLE_PAGE_IO + __le32 seq_num_bits; // 0x10: the number of bits in sequence number. + __le16 ra_len; // 0x14: + __le16 client_off; // 0x16: + __le64 l_size; // 0x18: Usable log file size. + __le32 last_lsn_data_len; // 0x20: + __le16 rec_hdr_len; // 0x24: log page data offset + __le16 data_off; // 0x26: log page data length + __le32 open_log_count; // 0x28: + __le32 align[5]; // 0x2C: + struct CLIENT_REC clients[1]; // 0x40: +}; + +struct LOG_REC_HDR { + __le16 redo_op; // 0x00: NTFS_LOG_OPERATION + __le16 undo_op; // 0x02: NTFS_LOG_OPERATION + __le16 redo_off; // 0x04: Offset to Redo record + __le16 redo_len; // 0x06: Redo length + __le16 undo_off; // 0x08: Offset to Undo record + __le16 undo_len; // 0x0A: Undo length + __le16 target_attr; // 0x0C: + __le16 lcns_follow; // 0x0E: + __le16 record_off; // 0x10: + __le16 attr_off; // 0x12: + __le16 cluster_off; // 0x14: + __le16 reserved; // 0x16: + __le64 target_vcn; // 0x18: + __le64 page_lcns[1]; // 0x20: +}; + +static_assert(sizeof(struct LOG_REC_HDR) == 0x28); + +#define RESTART_ENTRY_ALLOCATED 0xFFFFFFFF +#define RESTART_ENTRY_ALLOCATED_LE cpu_to_le32(0xFFFFFFFF) + +struct RESTART_TABLE { + __le16 size; // 0x00: In bytes + __le16 used; // 0x02: entries + __le16 total; // 0x04: entries + __le16 res[3]; // 0x06: + __le32 free_goal; // 0x0C: + __le32 first_free; // 0x10 + __le32 last_free; // 0x14 + +}; + +static_assert(sizeof(struct RESTART_TABLE) == 0x18); + +struct ATTR_NAME_ENTRY { + __le16 off; // offset in the Open attribute Table + __le16 name_bytes; + __le16 name[1]; +}; + +struct OPEN_ATTR_ENRTY { + __le32 next; // 0x00: RESTART_ENTRY_ALLOCATED if allocated + __le32 bytes_per_index; // 0x04: + enum ATTR_TYPE type; // 0x08: + u8 is_dirty_pages; // 0x0C: + u8 is_attr_name; // 0x0B: Faked field to manage 'ptr' + u8 name_len; // 0x0C: Faked field to manage 'ptr' + u8 res; + struct MFT_REF ref; // 0x10: File Reference of file containing attribute + __le64 open_record_lsn; // 0x18: + void *ptr; // 0x20: +}; + +/* 32 bit version of 'struct OPEN_ATTR_ENRTY' */ +struct OPEN_ATTR_ENRTY_32 { + __le32 next; // 0x00: RESTART_ENTRY_ALLOCATED if allocated + __le32 ptr; // 0x04: + struct MFT_REF ref; // 0x08: + __le64 open_record_lsn; // 0x10: + u8 is_dirty_pages; // 0x18: + u8 is_attr_name; // 0x19 + u8 res1[2]; + enum ATTR_TYPE type; // 0x1C: + u8 name_len; // 0x20: in wchar + u8 res2[3]; + __le32 AttributeName; // 0x24: + __le32 bytes_per_index; // 0x28: +}; + +#define SIZEOF_OPENATTRIBUTEENTRY0 0x2c +// static_assert( 0x2C == sizeof(struct OPEN_ATTR_ENRTY_32) ); +static_assert(sizeof(struct OPEN_ATTR_ENRTY) < SIZEOF_OPENATTRIBUTEENTRY0); + +/* + * One entry exists in the Dirty Pages Table for each page which is dirty at the + * time the Restart Area is written + */ +struct DIR_PAGE_ENTRY { + __le32 next; // 0x00: RESTART_ENTRY_ALLOCATED if allocated + __le32 target_attr; // 0x04: Index into the Open attribute Table + __le32 transfer_len; // 0x08: + __le32 lcns_follow; // 0x0C: + __le64 vcn; // 0x10: Vcn of dirty page + __le64 oldest_lsn; // 0x18: + __le64 page_lcns[1]; // 0x20: +}; + +static_assert(sizeof(struct DIR_PAGE_ENTRY) == 0x28); + +/* 32 bit version of 'struct DIR_PAGE_ENTRY' */ +struct DIR_PAGE_ENTRY_32 { + __le32 next; // 0x00: RESTART_ENTRY_ALLOCATED if allocated + __le32 target_attr; // 0x04: Index into the Open attribute Table + __le32 transfer_len; // 0x08: + __le32 lcns_follow; // 0x0C: + __le32 reserved; // 0x10: + __le32 vcn_low; // 0x14: Vcn of dirty page + __le32 vcn_hi; // 0x18: Vcn of dirty page + __le32 oldest_lsn_low; // 0x1C: + __le32 oldest_lsn_hi; // 0x1C: + __le32 page_lcns_low; // 0x24: + __le32 page_lcns_hi; // 0x24: +}; + +static_assert(offsetof(struct DIR_PAGE_ENTRY_32, vcn_low) == 0x14); +static_assert(sizeof(struct DIR_PAGE_ENTRY_32) == 0x2c); + +enum transact_state { + TransactionUninitialized = 0, + TransactionActive, + TransactionPrepared, + TransactionCommitted +}; + +struct TRANSACTION_ENTRY { + __le32 next; // 0x00: RESTART_ENTRY_ALLOCATED if allocated + u8 transact_state; // 0x04: + u8 reserved[3]; // 0x05: + __le64 first_lsn; // 0x08: + __le64 prev_lsn; // 0x10: + __le64 undo_next_lsn; // 0x18: + __le32 undo_records; // 0x20: Number of undo log records pending abort + __le32 undo_len; // 0x24: Total undo size +}; + +static_assert(sizeof(struct TRANSACTION_ENTRY) == 0x28); + +struct NTFS_RESTART { + __le32 major_ver; // 0x00: + __le32 minor_ver; // 0x04: + __le64 check_point_start; // 0x08: + __le64 open_attr_table_lsn; // 0x10: + __le64 attr_names_lsn; // 0x18: + __le64 dirty_pages_table_lsn; // 0x20: + __le64 transact_table_lsn; // 0x28: + __le32 open_attr_len; // 0x30: In bytes + __le32 attr_names_len; // 0x34: In bytes + __le32 dirty_pages_len; // 0x38: In bytes + __le32 transact_table_len; // 0x3C: In bytes +}; + +static_assert(sizeof(struct NTFS_RESTART) == 0x40); + +struct NEW_ATTRIBUTE_SIZES { + __le64 alloc_size; + __le64 valid_size; + __le64 data_size; + __le64 total_size; +}; + +struct BITMAP_RANGE { + __le32 bitmap_off; + __le32 bits; +}; + +struct LCN_RANGE { + __le64 lcn; + __le64 len; +}; + +/* The following type defines the different log record types */ +#define LfsClientRecord cpu_to_le32(1) +#define LfsClientRestart cpu_to_le32(2) + +/* This is used to uniquely identify a client for a particular log file */ +struct CLIENT_ID { + __le16 seq_num; + __le16 client_idx; +}; + +/* This is the header that begins every Log Record in the log file */ +struct LFS_RECORD_HDR { + __le64 this_lsn; // 0x00: + __le64 client_prev_lsn; // 0x08: + __le64 client_undo_next_lsn; // 0x10: + __le32 client_data_len; // 0x18: + struct CLIENT_ID client; // 0x1C: Owner of this log record + __le32 record_type; // 0x20: LfsClientRecord or LfsClientRestart + __le32 transact_id; // 0x24: + __le16 flags; // 0x28: LOG_RECORD_MULTI_PAGE + u8 align[6]; // 0x2A: +}; + +#define LOG_RECORD_MULTI_PAGE cpu_to_le16(1) + +static_assert(sizeof(struct LFS_RECORD_HDR) == 0x30); + +struct LFS_RECORD { + __le16 next_record_off; // 0x00: Offset of the free space in the page + u8 align[6]; // 0x02: + __le64 last_end_lsn; // 0x08: lsn for the last log record which ends on the page +}; + +static_assert(sizeof(struct LFS_RECORD) == 0x10); + +struct RECORD_PAGE_HDR { + struct NTFS_RECORD_HEADER rhdr; // 'RCRD' + __le32 rflags; // 0x10: See LOG_PAGE_LOG_RECORD_END + __le16 page_count; // 0x14: + __le16 page_pos; // 0x16: + struct LFS_RECORD record_hdr; // 0x18 + __le16 fixups[10]; // 0x28 + __le32 file_off; // 0x3c: used when major version >= 2 +}; + +// clang-format on + +// Page contains the end of a log record +#define LOG_PAGE_LOG_RECORD_END cpu_to_le32(0x00000001) + +static inline bool is_log_record_end(const struct RECORD_PAGE_HDR *hdr) +{ + return hdr->rflags & LOG_PAGE_LOG_RECORD_END; +} + +static_assert(offsetof(struct RECORD_PAGE_HDR, file_off) == 0x3c); + +/* + * END of NTFS LOG structures + */ + +/* Define some tuning parameters to keep the restart tables a reasonable size */ +#define INITIAL_NUMBER_TRANSACTIONS 5 + +enum NTFS_LOG_OPERATION { + + Noop = 0x00, + CompensationLogRecord = 0x01, + InitializeFileRecordSegment = 0x02, + DeallocateFileRecordSegment = 0x03, + WriteEndOfFileRecordSegment = 0x04, + CreateAttribute = 0x05, + DeleteAttribute = 0x06, + UpdateResidentValue = 0x07, + UpdateNonresidentValue = 0x08, + UpdateMappingPairs = 0x09, + DeleteDirtyClusters = 0x0A, + SetNewAttributeSizes = 0x0B, + AddIndexEntryRoot = 0x0C, + DeleteIndexEntryRoot = 0x0D, + AddIndexEntryAllocation = 0x0E, + DeleteIndexEntryAllocation = 0x0F, + WriteEndOfIndexBuffer = 0x10, + SetIndexEntryVcnRoot = 0x11, + SetIndexEntryVcnAllocation = 0x12, + UpdateFileNameRoot = 0x13, + UpdateFileNameAllocation = 0x14, + SetBitsInNonresidentBitMap = 0x15, + ClearBitsInNonresidentBitMap = 0x16, + HotFix = 0x17, + EndTopLevelAction = 0x18, + PrepareTransaction = 0x19, + CommitTransaction = 0x1A, + ForgetTransaction = 0x1B, + OpenNonresidentAttribute = 0x1C, + OpenAttributeTableDump = 0x1D, + AttributeNamesDump = 0x1E, + DirtyPageTableDump = 0x1F, + TransactionTableDump = 0x20, + UpdateRecordDataRoot = 0x21, + UpdateRecordDataAllocation = 0x22, + + UpdateRelativeDataInIndex = + 0x23, // NtOfsRestartUpdateRelativeDataInIndex + UpdateRelativeDataInIndex2 = 0x24, + ZeroEndOfFileRecord = 0x25, +}; + +/* + * Array for log records which require a target attribute + * A true indicates that the corresponding restart operation requires a target attribute + */ +static const u8 AttributeRequired[] = { + 0xFC, 0xFB, 0xFF, 0x10, 0x06, +}; + +static inline bool is_target_required(u16 op) +{ + bool ret = op <= UpdateRecordDataAllocation && + (AttributeRequired[op >> 3] >> (op & 7) & 1); + return ret; +} + +static inline bool can_skip_action(enum NTFS_LOG_OPERATION op) +{ + switch (op) { + case Noop: + case DeleteDirtyClusters: + case HotFix: + case EndTopLevelAction: + case PrepareTransaction: + case CommitTransaction: + case ForgetTransaction: + case CompensationLogRecord: + case OpenNonresidentAttribute: + case OpenAttributeTableDump: + case AttributeNamesDump: + case DirtyPageTableDump: + case TransactionTableDump: + return true; + default: + return false; + } +} + +enum { lcb_ctx_undo_next, lcb_ctx_prev, lcb_ctx_next }; + +/* bytes per restart table */ +static inline u32 bytes_per_rt(const struct RESTART_TABLE *rt) +{ + return le16_to_cpu(rt->used) * le16_to_cpu(rt->size) + + sizeof(struct RESTART_TABLE); +} + +/* log record length */ +static inline u32 lrh_length(const struct LOG_REC_HDR *lr) +{ + u16 t16 = le16_to_cpu(lr->lcns_follow); + + return t16 > 1 ? sizeof(struct LOG_REC_HDR) + (t16 - 1) * sizeof(u64) + : sizeof(struct LOG_REC_HDR); +} + +struct lcb { + struct LFS_RECORD_HDR *lrh; // Log record header of the current lsn + struct LOG_REC_HDR *log_rec; + u32 ctx_mode; // lcb_ctx_undo_next/lcb_ctx_prev/lcb_ctx_next + struct CLIENT_ID client; + bool alloc; // if true the we should deallocate 'log_rec' +}; + +static void lcb_put(struct lcb *lcb) +{ + if (lcb->alloc) + ntfs_free(lcb->log_rec); + ntfs_free(lcb->lrh); + ntfs_free(lcb); +} + +/* + * oldest_client_lsn + * + * find the oldest lsn from active clients. + */ +static inline void oldest_client_lsn(const struct CLIENT_REC *ca, + __le16 next_client, u64 *oldest_lsn) +{ + while (next_client != LFS_NO_CLIENT_LE) { + const struct CLIENT_REC *cr = ca + le16_to_cpu(next_client); + u64 lsn = le64_to_cpu(cr->oldest_lsn); + + /* ignore this block if it's oldest lsn is 0 */ + if (lsn && lsn < *oldest_lsn) + *oldest_lsn = lsn; + + next_client = cr->next_client; + } +} + +static inline bool is_rst_page_hdr_valid(u32 file_off, + const struct RESTART_HDR *rhdr) +{ + u32 sys_page = le32_to_cpu(rhdr->sys_page_size); + u32 page_size = le32_to_cpu(rhdr->page_size); + u32 end_usa; + u16 ro; + + if (sys_page < SECTOR_SIZE || page_size < SECTOR_SIZE || + sys_page & (sys_page - 1) || page_size & (page_size - 1)) { + return false; + } + + /* Check that if the file offset isn't 0, it is the system page size */ + if (file_off && file_off != sys_page) + return false; + + /* Check support version 1.1+ */ + if (le16_to_cpu(rhdr->major_ver) <= 1 && !rhdr->minor_ver) + return false; + + if (le16_to_cpu(rhdr->major_ver) > 2) + return false; + + ro = le16_to_cpu(rhdr->ra_off); + if (!IsQuadAligned(ro) || ro > sys_page) + return false; + + end_usa = ((sys_page >> SECTOR_SHIFT) + 1) * sizeof(short); + end_usa += le16_to_cpu(rhdr->rhdr.fix_off); + + if (ro < end_usa) + return false; + + return true; +} + +static inline bool is_rst_area_valid(const struct RESTART_HDR *rhdr) +{ + const struct RESTART_AREA *ra; + u16 cl, fl, ul; + u32 off, l_size, file_dat_bits, file_size_round; + u16 ro = le16_to_cpu(rhdr->ra_off); + u32 sys_page = le32_to_cpu(rhdr->sys_page_size); + + if (ro + offsetof(struct RESTART_AREA, l_size) > + SECTOR_SIZE - sizeof(short)) + return false; + + ra = Add2Ptr(rhdr, ro); + cl = le16_to_cpu(ra->log_clients); + + if (cl > 1) + return false; + + off = le16_to_cpu(ra->client_off); + + if (!IsQuadAligned(off) || ro + off > SECTOR_SIZE - sizeof(short)) + return false; + + off += cl * sizeof(struct CLIENT_REC); + + if (off > sys_page) + return false; + + /* + * Check the restart length field and whether the entire + * restart area is contained that length + */ + if (le16_to_cpu(rhdr->ra_off) + le16_to_cpu(ra->ra_len) > sys_page || + off > le16_to_cpu(ra->ra_len)) { + return false; + } + + /* + * As a final check make sure that the use list and the free list + * are either empty or point to a valid client + */ + fl = le16_to_cpu(ra->client_idx[0]); + ul = le16_to_cpu(ra->client_idx[1]); + if ((fl != LFS_NO_CLIENT && fl >= cl) || + (ul != LFS_NO_CLIENT && ul >= cl)) + return false; + + /* Make sure the sequence number bits match the log file size */ + l_size = le64_to_cpu(ra->l_size); + + file_dat_bits = sizeof(u64) * 8 - le32_to_cpu(ra->seq_num_bits); + file_size_round = 1u << (file_dat_bits + 3); + if (file_size_round != l_size && + (file_size_round < l_size || (file_size_round / 2) > l_size)) { + return false; + } + + /* The log page data offset and record header length must be quad-aligned */ + if (!IsQuadAligned(le16_to_cpu(ra->data_off)) || + !IsQuadAligned(le16_to_cpu(ra->rec_hdr_len))) + return false; + + return true; +} + +static inline bool is_client_area_valid(const struct RESTART_HDR *rhdr, + bool usa_error) +{ + u16 ro = le16_to_cpu(rhdr->ra_off); + const struct RESTART_AREA *ra = Add2Ptr(rhdr, ro); + u16 ra_len = le16_to_cpu(ra->ra_len); + const struct CLIENT_REC *ca; + u32 i; + + if (usa_error && ra_len + ro > SECTOR_SIZE - sizeof(short)) + return false; + + /* Find the start of the client array */ + ca = Add2Ptr(ra, le16_to_cpu(ra->client_off)); + + /* + * Start with the free list + * Check that all the clients are valid and that there isn't a cycle + * Do the in-use list on the second pass + */ + for (i = 0; i < 2; i++) { + u16 client_idx = le16_to_cpu(ra->client_idx[i]); + bool first_client = true; + u16 clients = le16_to_cpu(ra->log_clients); + + while (client_idx != LFS_NO_CLIENT) { + const struct CLIENT_REC *cr; + + if (!clients || + client_idx >= le16_to_cpu(ra->log_clients)) + return false; + + clients -= 1; + cr = ca + client_idx; + + client_idx = le16_to_cpu(cr->next_client); + + if (first_client) { + first_client = false; + if (cr->prev_client != LFS_NO_CLIENT_LE) + return false; + } + } + } + + return true; +} + +/* + * remove_client + * + * remove a client record from a client record list an restart area + */ +static inline void remove_client(struct CLIENT_REC *ca, + const struct CLIENT_REC *cr, __le16 *head) +{ + if (cr->prev_client == LFS_NO_CLIENT_LE) + *head = cr->next_client; + else + ca[le16_to_cpu(cr->prev_client)].next_client = cr->next_client; + + if (cr->next_client != LFS_NO_CLIENT_LE) + ca[le16_to_cpu(cr->next_client)].prev_client = cr->prev_client; +} + +/* + * add_client + * + * add a client record to the start of a list + */ +static inline void add_client(struct CLIENT_REC *ca, u16 index, __le16 *head) +{ + struct CLIENT_REC *cr = ca + index; + + cr->prev_client = LFS_NO_CLIENT_LE; + cr->next_client = *head; + + if (*head != LFS_NO_CLIENT_LE) + ca[le16_to_cpu(*head)].prev_client = cpu_to_le16(index); + + *head = cpu_to_le16(index); +} + +/* + * enum_rstbl + * + */ +static inline void *enum_rstbl(struct RESTART_TABLE *t, void *c) +{ + __le32 *e; + u32 bprt; + u16 rsize = t ? le16_to_cpu(t->size) : 0; + + if (!c) { + if (!t || !t->total) + return NULL; + e = Add2Ptr(t, sizeof(struct RESTART_TABLE)); + } else { + e = Add2Ptr(c, rsize); + } + + /* Loop until we hit the first one allocated, or the end of the list */ + for (bprt = bytes_per_rt(t); PtrOffset(t, e) < bprt; + e = Add2Ptr(e, rsize)) { + if (*e == RESTART_ENTRY_ALLOCATED_LE) + return e; + } + return NULL; +} + +/* + * find_dp + * + * searches for a 'vcn' in Dirty Page Table, + */ +static inline struct DIR_PAGE_ENTRY *find_dp(struct RESTART_TABLE *dptbl, + u32 target_attr, u64 vcn) +{ + __le32 ta = cpu_to_le32(target_attr); + struct DIR_PAGE_ENTRY *dp = NULL; + + while ((dp = enum_rstbl(dptbl, dp))) { + u64 dp_vcn = le64_to_cpu(dp->vcn); + + if (dp->target_attr == ta && vcn >= dp_vcn && + vcn < dp_vcn + le32_to_cpu(dp->lcns_follow)) { + return dp; + } + } + return NULL; +} + +static inline u32 norm_file_page(u32 page_size, u32 *l_size, bool use_default) +{ + if (use_default) + page_size = DefaultLogPageSize; + + /* Round the file size down to a system page boundary */ + *l_size &= ~(page_size - 1); + + /* File should contain at least 2 restart pages and MinLogRecordPages pages */ + if (*l_size < (MinLogRecordPages + 2) * page_size) + return 0; + + return page_size; +} + +static bool check_log_rec(const struct LOG_REC_HDR *lr, u32 bytes, u32 tr, + u32 bytes_per_attr_entry) +{ + u16 t16; + + if (bytes < sizeof(struct LOG_REC_HDR)) + return false; + if (!tr) + return false; + + if ((tr - sizeof(struct RESTART_TABLE)) % + sizeof(struct TRANSACTION_ENTRY)) + return false; + + if (le16_to_cpu(lr->redo_off) & 7) + return false; + + if (le16_to_cpu(lr->undo_off) & 7) + return false; + + if (lr->target_attr) + goto check_lcns; + + if (is_target_required(le16_to_cpu(lr->redo_op))) + return false; + + if (is_target_required(le16_to_cpu(lr->undo_op))) + return false; + +check_lcns: + if (!lr->lcns_follow) + goto check_length; + + t16 = le16_to_cpu(lr->target_attr); + if ((t16 - sizeof(struct RESTART_TABLE)) % bytes_per_attr_entry) + return false; + +check_length: + if (bytes < lrh_length(lr)) + return false; + + return true; +} + +static bool check_rstbl(const struct RESTART_TABLE *rt, size_t bytes) +{ + u32 ts; + u32 i, off; + u16 rsize = le16_to_cpu(rt->size); + u16 ne = le16_to_cpu(rt->used); + u32 ff = le32_to_cpu(rt->first_free); + u32 lf = le32_to_cpu(rt->last_free); + + ts = rsize * ne + sizeof(struct RESTART_TABLE); + + if (!rsize || rsize > bytes || + rsize + sizeof(struct RESTART_TABLE) > bytes || bytes < ts || + le16_to_cpu(rt->total) > ne || ff > ts || lf > ts || + (ff && ff < sizeof(struct RESTART_TABLE)) || + (lf && lf < sizeof(struct RESTART_TABLE))) { + return false; + } + + /* Verify each entry is either allocated or points + * to a valid offset the table + */ + for (i = 0; i < ne; i++) { + off = le32_to_cpu(*(__le32 *)Add2Ptr( + rt, i * rsize + sizeof(struct RESTART_TABLE))); + + if (off != RESTART_ENTRY_ALLOCATED && off && + (off < sizeof(struct RESTART_TABLE) || + ((off - sizeof(struct RESTART_TABLE)) % rsize))) { + return false; + } + } + + /* Walk through the list headed by the first entry to make + * sure none of the entries are currently being used + */ + for (off = ff; off;) { + if (off == RESTART_ENTRY_ALLOCATED) + return false; + + off = le32_to_cpu(*(__le32 *)Add2Ptr(rt, off)); + } + + return true; +} + +/* + * free_rsttbl_idx + * + * frees a previously allocated index a Restart Table. + */ +static inline void free_rsttbl_idx(struct RESTART_TABLE *rt, u32 off) +{ + __le32 *e; + u32 lf = le32_to_cpu(rt->last_free); + __le32 off_le = cpu_to_le32(off); + + e = Add2Ptr(rt, off); + + if (off < le32_to_cpu(rt->free_goal)) { + *e = rt->first_free; + rt->first_free = off_le; + if (!lf) + rt->last_free = off_le; + } else { + if (lf) + *(__le32 *)Add2Ptr(rt, lf) = off_le; + else + rt->first_free = off_le; + + rt->last_free = off_le; + *e = 0; + } + + le16_sub_cpu(&rt->total, 1); +} + +static inline struct RESTART_TABLE *init_rsttbl(u16 esize, u16 used) +{ + __le32 *e, *last_free; + u32 off; + u32 bytes = esize * used + sizeof(struct RESTART_TABLE); + u32 lf = sizeof(struct RESTART_TABLE) + (used - 1) * esize; + struct RESTART_TABLE *t = ntfs_zalloc(bytes); + + t->size = cpu_to_le16(esize); + t->used = cpu_to_le16(used); + t->free_goal = cpu_to_le32(~0u); + t->first_free = cpu_to_le32(sizeof(struct RESTART_TABLE)); + t->last_free = cpu_to_le32(lf); + + e = (__le32 *)(t + 1); + last_free = Add2Ptr(t, lf); + + for (off = sizeof(struct RESTART_TABLE) + esize; e < last_free; + e = Add2Ptr(e, esize), off += esize) { + *e = cpu_to_le32(off); + } + return t; +} + +static inline struct RESTART_TABLE *extend_rsttbl(struct RESTART_TABLE *tbl, + u32 add, u32 free_goal) +{ + u16 esize = le16_to_cpu(tbl->size); + __le32 osize = cpu_to_le32(bytes_per_rt(tbl)); + u32 used = le16_to_cpu(tbl->used); + struct RESTART_TABLE *rt = init_rsttbl(esize, used + add); + + memcpy(rt + 1, tbl + 1, esize * used); + + rt->free_goal = free_goal == ~0u + ? cpu_to_le32(~0u) + : cpu_to_le32(sizeof(struct RESTART_TABLE) + + free_goal * esize); + + if (tbl->first_free) { + rt->first_free = tbl->first_free; + *(__le32 *)Add2Ptr(rt, le32_to_cpu(tbl->last_free)) = osize; + } else { + rt->first_free = osize; + } + + rt->total = tbl->total; + + ntfs_free(tbl); + return rt; +} + +/* + * alloc_rsttbl_idx + * + * allocates an index from within a previously initialized Restart Table + */ +static inline void *alloc_rsttbl_idx(struct RESTART_TABLE **tbl) +{ + u32 off; + __le32 *e; + struct RESTART_TABLE *t = *tbl; + + if (!t->first_free) + *tbl = t = extend_rsttbl(t, 16, ~0u); + + off = le32_to_cpu(t->first_free); + + /* Dequeue this entry and zero it. */ + e = Add2Ptr(t, off); + + t->first_free = *e; + + memset(e, 0, le16_to_cpu(t->size)); + + *e = RESTART_ENTRY_ALLOCATED_LE; + + /* If list is going empty, then we fix the last_free as well. */ + if (!t->first_free) + t->last_free = 0; + + le16_add_cpu(&t->total, 1); + + return Add2Ptr(t, off); +} + +/* + * alloc_rsttbl_from_idx + * + * allocates a specific index from within a previously initialized Restart Table + */ +static inline void *alloc_rsttbl_from_idx(struct RESTART_TABLE **tbl, u32 vbo) +{ + u32 off; + __le32 *e; + struct RESTART_TABLE *rt = *tbl; + u32 bytes = bytes_per_rt(rt); + u16 esize = le16_to_cpu(rt->size); + + /* If the entry is not the table, we will have to extend the table */ + if (vbo >= bytes) { + /* + * extend the size by computing the number of entries between + * the existing size and the desired index and adding + * 1 to that + */ + u32 bytes2idx = vbo - bytes; + + /* There should always be an integral number of entries being added */ + /* Now extend the table */ + *tbl = rt = extend_rsttbl(rt, bytes2idx / esize + 1, bytes); + if (!rt) + return NULL; + } + + /* see if the entry is already allocated, and just return if it is. */ + e = Add2Ptr(rt, vbo); + + if (*e == RESTART_ENTRY_ALLOCATED_LE) + return e; + + /* + * Walk through the table, looking for the entry we're + * interested and the previous entry + */ + off = le32_to_cpu(rt->first_free); + e = Add2Ptr(rt, off); + + if (off == vbo) { + /* this is a match */ + rt->first_free = *e; + goto skip_looking; + } + + /* + * need to walk through the list looking for the predecessor of our entry + */ + for (;;) { + /* Remember the entry just found */ + u32 last_off = off; + __le32 *last_e = e; + + /* should never run of entries. */ + + /* Lookup up the next entry the list */ + off = le32_to_cpu(*last_e); + e = Add2Ptr(rt, off); + + /* If this is our match we are done */ + if (off == vbo) { + *last_e = *e; + + /* If this was the last entry, we update that the table as well */ + if (le32_to_cpu(rt->last_free) == off) + rt->last_free = cpu_to_le32(last_off); + break; + } + } + +skip_looking: + /* If the list is now empty, we fix the last_free as well */ + if (!rt->first_free) + rt->last_free = 0; + + /* Zero this entry */ + memset(e, 0, esize); + *e = RESTART_ENTRY_ALLOCATED_LE; + + le16_add_cpu(&rt->total, 1); + + return e; +} + +#define RESTART_SINGLE_PAGE_IO cpu_to_le16(0x0001) + +#define NTFSLOG_WRAPPED 0x00000001 +#define NTFSLOG_MULTIPLE_PAGE_IO 0x00000002 +#define NTFSLOG_NO_LAST_LSN 0x00000004 +#define NTFSLOG_REUSE_TAIL 0x00000010 +#define NTFSLOG_NO_OLDEST_LSN 0x00000020 + +/* + * Helper struct to work with NTFS LogFile + */ +struct ntfs_log { + struct ntfs_inode *ni; + + u32 l_size; + u32 sys_page_size; + u32 sys_page_mask; + u32 page_size; + u32 page_mask; // page_size - 1 + u8 page_bits; + struct RECORD_PAGE_HDR *one_page_buf; + + struct RESTART_TABLE *open_attr_tbl; + u32 transaction_id; + u32 clst_per_page; + + u32 first_page; + u32 next_page; + u32 ra_off; + u32 data_off; + u32 restart_size; + u32 data_size; + u16 record_header_len; + u64 seq_num; + u32 seq_num_bits; + u32 file_data_bits; + u32 seq_num_mask; /* (1 << file_data_bits) - 1 */ + + struct RESTART_AREA *ra; /* in-memory image of the next restart area */ + u32 ra_size; /* the usable size of the restart area */ + + /* + * If true, then the in-memory restart area is to be written + * to the first position on the disk + */ + bool init_ra; + bool set_dirty; /* true if we need to set dirty flag */ + + u64 oldest_lsn; + + u32 oldest_lsn_off; + u64 last_lsn; + + u32 total_avail; + u32 total_avail_pages; + u32 total_undo_commit; + u32 max_current_avail; + u32 current_avail; + u32 reserved; + + short major_ver; + short minor_ver; + + u32 l_flags; /* See NTFSLOG_XXX */ + u32 current_openlog_count; /* On-disk value for open_log_count */ + + struct CLIENT_ID client_id; + u32 client_undo_commit; +}; + +static inline u32 lsn_to_vbo(struct ntfs_log *log, const u64 lsn) +{ + u32 vbo = (lsn << log->seq_num_bits) >> (log->seq_num_bits - 3); + + return vbo; +} + +/* compute the offset in the log file of the next log page */ +static inline u32 next_page_off(struct ntfs_log *log, u32 off) +{ + off = (off & ~log->sys_page_mask) + log->page_size; + return off >= log->l_size ? log->first_page : off; +} + +static inline u32 lsn_to_page_off(struct ntfs_log *log, u64 lsn) +{ + return (((u32)lsn) << 3) & log->page_mask; +} + +static inline u64 vbo_to_lsn(struct ntfs_log *log, u32 off, u64 Seq) +{ + return (off >> 3) + (Seq << log->file_data_bits); +} + +static inline bool is_lsn_in_file(struct ntfs_log *log, u64 lsn) +{ + return lsn >= log->oldest_lsn && + lsn <= le64_to_cpu(log->ra->current_lsn); +} + +static inline u32 hdr_file_off(struct ntfs_log *log, + struct RECORD_PAGE_HDR *hdr) +{ + if (log->major_ver < 2) + return le64_to_cpu(hdr->rhdr.lsn); + + return le32_to_cpu(hdr->file_off); +} + +static inline u64 base_lsn(struct ntfs_log *log, + const struct RECORD_PAGE_HDR *hdr, u64 lsn) +{ + u64 h_lsn = le64_to_cpu(hdr->rhdr.lsn); + u64 ret = (((h_lsn >> log->file_data_bits) + + (lsn < (lsn_to_vbo(log, h_lsn) & ~log->page_mask) ? 1 : 0)) + << log->file_data_bits) + + ((((is_log_record_end(hdr) && + h_lsn <= le64_to_cpu(hdr->record_hdr.last_end_lsn)) + ? le16_to_cpu(hdr->record_hdr.next_record_off) + : log->page_size) + + lsn) >> + 3); + + return ret; +} + +static inline bool verify_client_lsn(struct ntfs_log *log, + const struct CLIENT_REC *client, u64 lsn) +{ + return lsn >= le64_to_cpu(client->oldest_lsn) && + lsn <= le64_to_cpu(log->ra->current_lsn) && lsn; +} + +struct restart_info { + u64 last_lsn; + struct RESTART_HDR *r_page; + u32 vbo; + bool chkdsk_was_run; + bool valid_page; + bool initialized; + bool restart; +}; + +static int read_log_page(struct ntfs_log *log, u32 vbo, + struct RECORD_PAGE_HDR **buffer, bool *usa_error) +{ + int err = 0; + u32 page_idx = vbo >> log->page_bits; + u32 page_off = vbo & log->page_mask; + u32 bytes = log->page_size - page_off; + void *to_free = NULL; + u32 page_vbo = page_idx << log->page_bits; + struct RECORD_PAGE_HDR *page_buf; + struct ntfs_inode *ni = log->ni; + bool bBAAD; + + if (vbo >= log->l_size) + return -EINVAL; + + if (!*buffer) { + to_free = ntfs_malloc(bytes); + if (!to_free) + return -ENOMEM; + *buffer = to_free; + } + + page_buf = page_off ? log->one_page_buf : *buffer; + + err = ntfs_read_run_nb(ni->mi.sbi, &ni->file.run, page_vbo, page_buf, + log->page_size, NULL); + if (err) + goto out; + + if (page_buf->rhdr.sign != NTFS_FFFF_SIGNATURE) + ntfs_fix_post_read(&page_buf->rhdr, PAGE_SIZE, false); + + if (page_buf != *buffer) + memcpy(*buffer, Add2Ptr(page_buf, page_off), bytes); + + bBAAD = page_buf->rhdr.sign == NTFS_BAAD_SIGNATURE; + + if (usa_error) + *usa_error = bBAAD; + /* Check that the update sequence array for this page is valid */ + /* If we don't allow errors, raise an error status */ + else if (bBAAD) + err = -EINVAL; + +out: + if (err && to_free) { + ntfs_free(to_free); + *buffer = NULL; + } + + return err; +} + +/* + * log_read_rst + * + * it walks through 512 blocks of the file looking for a valid restart page header + * It will stop the first time we find a valid page header + */ +static int log_read_rst(struct ntfs_log *log, u32 l_size, bool first, + struct restart_info *info) +{ + u32 skip, vbo; + struct RESTART_HDR *r_page = ntfs_malloc(DefaultLogPageSize); + + if (!r_page) + return -ENOMEM; + + memset(info, 0, sizeof(struct restart_info)); + + /* Determine which restart area we are looking for */ + if (first) { + vbo = 0; + skip = 512; + } else { + vbo = 512; + skip = 0; + } + + /* loop continuously until we succeed */ + for (; vbo < l_size; vbo = 2 * vbo + skip, skip = 0) { + bool usa_error; + u32 sys_page_size; + bool brst, bchk; + struct RESTART_AREA *ra; + + /* Read a page header at the current offset */ + if (read_log_page(log, vbo, (struct RECORD_PAGE_HDR **)&r_page, + &usa_error)) { + /* ignore any errors */ + continue; + } + + /* exit if the signature is a log record page */ + if (r_page->rhdr.sign == NTFS_RCRD_SIGNATURE) { + info->initialized = true; + break; + } + + brst = r_page->rhdr.sign == NTFS_RSTR_SIGNATURE; + bchk = r_page->rhdr.sign == NTFS_CHKD_SIGNATURE; + + if (!bchk && !brst) { + if (r_page->rhdr.sign != NTFS_FFFF_SIGNATURE) { + /* + * Remember if the signature does not + * indicate uninitialized file + */ + info->initialized = true; + } + continue; + } + + ra = NULL; + info->valid_page = false; + info->initialized = true; + info->vbo = vbo; + + /* Let's check the restart area if this is a valid page */ + if (!is_rst_page_hdr_valid(vbo, r_page)) + goto check_result; + ra = Add2Ptr(r_page, le16_to_cpu(r_page->ra_off)); + + if (!is_rst_area_valid(r_page)) + goto check_result; + + /* + * We have a valid restart page header and restart area. + * If chkdsk was run or we have no clients then we have + * no more checking to do + */ + if (bchk || ra->client_idx[1] == LFS_NO_CLIENT_LE) { + info->valid_page = true; + goto check_result; + } + + /* Read the entire restart area */ + sys_page_size = le32_to_cpu(r_page->sys_page_size); + if (DefaultLogPageSize != sys_page_size) { + ntfs_free(r_page); + r_page = ntfs_zalloc(sys_page_size); + if (!r_page) + return -ENOMEM; + + if (read_log_page(log, vbo, + (struct RECORD_PAGE_HDR **)&r_page, + &usa_error)) { + /* ignore any errors */ + ntfs_free(r_page); + r_page = NULL; + continue; + } + } + + if (is_client_area_valid(r_page, usa_error)) { + info->valid_page = true; + ra = Add2Ptr(r_page, le16_to_cpu(r_page->ra_off)); + } + +check_result: + /* If chkdsk was run then update the caller's values and return */ + if (r_page->rhdr.sign == NTFS_CHKD_SIGNATURE) { + info->chkdsk_was_run = true; + info->last_lsn = le64_to_cpu(r_page->rhdr.lsn); + info->restart = true; + info->r_page = r_page; + return 0; + } + + /* If we have a valid page then copy the values we need from it */ + if (info->valid_page) { + info->last_lsn = le64_to_cpu(ra->current_lsn); + info->restart = true; + info->r_page = r_page; + return 0; + } + } + + ntfs_free(r_page); + + return 0; +} + +/* + * log_init_pg_hdr + * + * init "log' from restart page header + */ +static void log_init_pg_hdr(struct ntfs_log *log, u32 sys_page_size, + u32 page_size, u16 major_ver, u16 minor_ver) +{ + log->sys_page_size = sys_page_size; + log->sys_page_mask = sys_page_size - 1; + log->page_size = page_size; + log->page_mask = page_size - 1; + log->page_bits = blksize_bits(page_size); + + log->clst_per_page = log->page_size >> log->ni->mi.sbi->cluster_bits; + if (!log->clst_per_page) + log->clst_per_page = 1; + + log->first_page = major_ver >= 2 + ? 0x22 * page_size + : ((sys_page_size << 1) + (page_size << 1)); + log->major_ver = major_ver; + log->minor_ver = minor_ver; +} + +/* + * log_create + * + * init "log" in cases when we don't have a restart area to use + */ +static void log_create(struct ntfs_log *log, u32 l_size, const u64 last_lsn, + u32 open_log_count, bool wrapped, bool use_multi_page) +{ + log->l_size = l_size; + /* All file offsets must be quadword aligned */ + log->file_data_bits = blksize_bits(l_size) - 3; + log->seq_num_mask = (8 << log->file_data_bits) - 1; + log->seq_num_bits = sizeof(u64) * 8 - log->file_data_bits; + log->seq_num = (last_lsn >> log->file_data_bits) + 2; + log->next_page = log->first_page; + log->oldest_lsn = log->seq_num << log->file_data_bits; + log->oldest_lsn_off = 0; + log->last_lsn = log->oldest_lsn; + + log->l_flags |= NTFSLOG_NO_LAST_LSN | NTFSLOG_NO_OLDEST_LSN; + + /* Set the correct flags for the I/O and indicate if we have wrapped */ + if (wrapped) + log->l_flags |= NTFSLOG_WRAPPED; + + if (use_multi_page) + log->l_flags |= NTFSLOG_MULTIPLE_PAGE_IO; + + /* Compute the log page values */ + log->data_off = QuadAlign( + offsetof(struct RECORD_PAGE_HDR, fixups) + + sizeof(short) * ((log->page_size >> SECTOR_SHIFT) + 1)); + log->data_size = log->page_size - log->data_off; + log->record_header_len = sizeof(struct LFS_RECORD_HDR); + + /* Remember the different page sizes for reservation */ + log->reserved = log->data_size - log->record_header_len; + + /* Compute the restart page values. */ + log->ra_off = QuadAlign( + offsetof(struct RESTART_HDR, fixups) + + sizeof(short) * ((log->sys_page_size >> SECTOR_SHIFT) + 1)); + log->restart_size = log->sys_page_size - log->ra_off; + log->ra_size = offsetof(struct RESTART_AREA, clients) + + sizeof(struct CLIENT_REC); + log->current_openlog_count = open_log_count; + + /* + * The total available log file space is the number of + * log file pages times the space available on each page + */ + log->total_avail_pages = log->l_size - log->first_page; + log->total_avail = log->total_avail_pages >> log->page_bits; + + /* + * We assume that we can't use the end of the page less than + * the file record size + * Then we won't need to reserve more than the caller asks for + */ + log->max_current_avail = log->total_avail * log->reserved; + log->total_avail = log->total_avail * log->data_size; + log->current_avail = log->max_current_avail; +} + +/* + * log_create_ra + * + * This routine is called to fill a restart area from the values stored in 'log' + */ +static struct RESTART_AREA *log_create_ra(struct ntfs_log *log) +{ + struct CLIENT_REC *cr; + struct RESTART_AREA *ra = ntfs_zalloc(log->restart_size); + + if (!ra) + return NULL; + + ra->current_lsn = cpu_to_le64(log->last_lsn); + ra->log_clients = cpu_to_le16(1); + ra->client_idx[1] = LFS_NO_CLIENT_LE; + if (log->l_flags & NTFSLOG_MULTIPLE_PAGE_IO) + ra->flags = RESTART_SINGLE_PAGE_IO; + ra->seq_num_bits = cpu_to_le32(log->seq_num_bits); + ra->ra_len = cpu_to_le16(log->ra_size); + ra->client_off = cpu_to_le16(offsetof(struct RESTART_AREA, clients)); + ra->l_size = cpu_to_le64(log->l_size); + ra->rec_hdr_len = cpu_to_le16(log->record_header_len); + ra->data_off = cpu_to_le16(log->data_off); + ra->open_log_count = cpu_to_le32(log->current_openlog_count + 1); + + cr = ra->clients; + + cr->prev_client = LFS_NO_CLIENT_LE; + cr->next_client = LFS_NO_CLIENT_LE; + + return ra; +} + +static u32 final_log_off(struct ntfs_log *log, u64 lsn, u32 data_len) +{ + u32 base_vbo = lsn << 3; + u32 final_log_off = (base_vbo & log->seq_num_mask) & ~log->page_mask; + u32 page_off = base_vbo & log->page_mask; + u32 tail = log->page_size - page_off; + + page_off -= 1; + + /* Add the length of the header */ + data_len += log->record_header_len; + + /* + * If this lsn is contained this log page we are done + * Otherwise we need to walk through several log pages + */ + if (data_len > tail) { + data_len -= tail; + tail = log->data_size; + page_off = log->data_off - 1; + + for (;;) { + final_log_off = next_page_off(log, final_log_off); + + /* We are done if the remaining bytes fit on this page */ + if (data_len <= tail) + break; + data_len -= tail; + } + } + + /* + * We add the remaining bytes to our starting position on this page + * and then add that value to the file offset of this log page + */ + return final_log_off + data_len + page_off; +} + +static int next_log_lsn(struct ntfs_log *log, const struct LFS_RECORD_HDR *rh, + u64 *lsn) +{ + int err; + u64 this_lsn = le64_to_cpu(rh->this_lsn); + u32 vbo = lsn_to_vbo(log, this_lsn); + u32 end = + final_log_off(log, this_lsn, le32_to_cpu(rh->client_data_len)); + u32 hdr_off = end & ~log->sys_page_mask; + u64 seq = this_lsn >> log->file_data_bits; + struct RECORD_PAGE_HDR *page = NULL; + + /* Remember if we wrapped */ + if (end <= vbo) + seq += 1; + + /* log page header for this page */ + err = read_log_page(log, hdr_off, &page, NULL); + if (err) + return err; + + /* + * If the lsn we were given was not the last lsn on this page, + * then the starting offset for the next lsn is on a quad word + * boundary following the last file offset for the current lsn + * Otherwise the file offset is the start of the data on the next page + */ + if (this_lsn == le64_to_cpu(page->rhdr.lsn)) { + /* If we wrapped, we need to increment the sequence number */ + hdr_off = next_page_off(log, hdr_off); + if (hdr_off == log->first_page) + seq += 1; + + vbo = hdr_off + log->data_off; + } else { + vbo = QuadAlign(end); + } + + /* Compute the lsn based on the file offset and the sequence count */ + *lsn = vbo_to_lsn(log, vbo, seq); + + /* + * If this lsn is within the legal range for the file, we return true + * Otherwise false indicates that there are no more lsn's + */ + if (!is_lsn_in_file(log, *lsn)) + *lsn = 0; + + ntfs_free(page); + + return 0; +} + +/* + * current_log_avail + * + * calculate the number of bytes available for log records + */ +static u32 current_log_avail(struct ntfs_log *log) +{ + u32 oldest_off, next_free_off, free_bytes; + + if (log->l_flags & NTFSLOG_NO_LAST_LSN) { + /* The entire file is available */ + return log->max_current_avail; + } + + /* + * If there is a last lsn the restart area then we know that we will + * have to compute the free range + * If there is no oldest lsn then start at the first page of the file + */ + oldest_off = (log->l_flags & NTFSLOG_NO_OLDEST_LSN) + ? log->first_page + : (log->oldest_lsn_off & ~log->sys_page_mask); + + /* + * We will use the next log page offset to compute the next free page\ + * If we are going to reuse this page go to the next page + * If we are at the first page then use the end of the file + */ + next_free_off = (log->l_flags & NTFSLOG_REUSE_TAIL) + ? log->next_page + log->page_size + : log->next_page == log->first_page ? log->l_size + : log->next_page; + + /* If the two offsets are the same then there is no available space */ + if (oldest_off == next_free_off) + return 0; + /* + * If the free offset follows the oldest offset then subtract + * this range from the total available pages + */ + free_bytes = + oldest_off < next_free_off + ? log->total_avail_pages - (next_free_off - oldest_off) + : oldest_off - next_free_off; + + free_bytes >>= log->page_bits; + return free_bytes * log->reserved; +} + +static bool check_subseq_log_page(struct ntfs_log *log, + const struct RECORD_PAGE_HDR *rp, u32 vbo, + u64 seq) +{ + u64 lsn_seq; + const struct NTFS_RECORD_HEADER *rhdr = &rp->rhdr; + u64 lsn = le64_to_cpu(rhdr->lsn); + + if (rhdr->sign == NTFS_FFFF_SIGNATURE || !rhdr->sign) + return false; + + /* + * If the last lsn on the page occurs was written after the page + * that caused the original error then we have a fatal error + */ + lsn_seq = lsn >> log->file_data_bits; + + /* + * If the sequence number for the lsn the page is equal or greater + * than lsn we expect, then this is a subsequent write + */ + return lsn_seq >= seq || + (lsn_seq == seq - 1 && log->first_page == vbo && + vbo != (lsn_to_vbo(log, lsn) & ~log->page_mask)); +} + +/* + * last_log_lsn + * + * This routine walks through the log pages for a file, searching for the + * last log page written to the file + */ +static int last_log_lsn(struct ntfs_log *log) +{ + int err; + bool usa_error = false; + bool replace_page = false; + bool reuse_page = log->l_flags & NTFSLOG_REUSE_TAIL; + bool wrapped_file, wrapped; + + u32 page_cnt = 1, page_pos = 1; + u32 page_off = 0, page_off1 = 0, saved_off = 0; + u32 final_off, second_off, final_off_prev = 0, second_off_prev = 0; + u32 first_file_off = 0, second_file_off = 0; + u32 part_io_count = 0; + u32 tails = 0; + u32 this_off, curpage_off, nextpage_off, remain_pages; + + u64 expected_seq, seq_base = 0, lsn_base = 0; + u64 best_lsn, best_lsn1, best_lsn2; + u64 lsn_cur, lsn1, lsn2; + u64 last_ok_lsn = reuse_page ? log->last_lsn : 0; + + u16 cur_pos, best_page_pos; + + struct RECORD_PAGE_HDR *page = NULL; + struct RECORD_PAGE_HDR *tst_page = NULL; + struct RECORD_PAGE_HDR *first_tail = NULL; + struct RECORD_PAGE_HDR *second_tail = NULL; + struct RECORD_PAGE_HDR *tail_page = NULL; + struct RECORD_PAGE_HDR *second_tail_prev = NULL; + struct RECORD_PAGE_HDR *first_tail_prev = NULL; + struct RECORD_PAGE_HDR *page_bufs = NULL; + struct RECORD_PAGE_HDR *best_page; + + if (log->major_ver >= 2) { + final_off = 0x02 * log->page_size; + second_off = 0x12 * log->page_size; + + // 0x10 == 0x12 - 0x2 + page_bufs = ntfs_malloc(log->page_size * 0x10); + if (!page_bufs) + return -ENOMEM; + } else { + second_off = log->first_page - log->page_size; + final_off = second_off - log->page_size; + } + +next_tail: + /* Read second tail page (at pos 3/0x12000) */ + if (read_log_page(log, second_off, &second_tail, &usa_error) || + usa_error || second_tail->rhdr.sign != NTFS_RCRD_SIGNATURE) { + ntfs_free(second_tail); + second_tail = NULL; + second_file_off = 0; + lsn2 = 0; + } else { + second_file_off = hdr_file_off(log, second_tail); + lsn2 = le64_to_cpu(second_tail->record_hdr.last_end_lsn); + } + + /* Read first tail page (at pos 2/0x2000 ) */ + if (read_log_page(log, final_off, &first_tail, &usa_error) || + usa_error || first_tail->rhdr.sign != NTFS_RCRD_SIGNATURE) { + ntfs_free(first_tail); + first_tail = NULL; + first_file_off = 0; + lsn1 = 0; + } else { + first_file_off = hdr_file_off(log, first_tail); + lsn1 = le64_to_cpu(first_tail->record_hdr.last_end_lsn); + } + + if (log->major_ver < 2) { + int best_page; + + first_tail_prev = first_tail; + final_off_prev = first_file_off; + second_tail_prev = second_tail; + second_off_prev = second_file_off; + tails = 1; + + if (!first_tail && !second_tail) + goto tail_read; + + if (first_tail && second_tail) + best_page = lsn1 < lsn2 ? 1 : 0; + else if (first_tail) + best_page = 0; + else + best_page = 1; + + page_off = best_page ? second_file_off : first_file_off; + seq_base = (best_page ? lsn2 : lsn1) >> log->file_data_bits; + goto tail_read; + } + + best_lsn1 = first_tail ? base_lsn(log, first_tail, first_file_off) : 0; + best_lsn2 = + second_tail ? base_lsn(log, second_tail, second_file_off) : 0; + + if (first_tail && second_tail) { + if (best_lsn1 > best_lsn2) { + best_lsn = best_lsn1; + best_page = first_tail; + this_off = first_file_off; + } else { + best_lsn = best_lsn2; + best_page = second_tail; + this_off = second_file_off; + } + } else if (first_tail) { + best_lsn = best_lsn1; + best_page = first_tail; + this_off = first_file_off; + } else if (second_tail) { + best_lsn = best_lsn2; + best_page = second_tail; + this_off = second_file_off; + } else { + goto tail_read; + } + + best_page_pos = le16_to_cpu(best_page->page_pos); + + if (!tails) { + if (best_page_pos == page_pos) { + seq_base = best_lsn >> log->file_data_bits; + saved_off = page_off = le32_to_cpu(best_page->file_off); + lsn_base = best_lsn; + + memmove(page_bufs, best_page, log->page_size); + + page_cnt = le16_to_cpu(best_page->page_count); + if (page_cnt > 1) + page_pos += 1; + + tails = 1; + } + } else if (seq_base == (best_lsn >> log->file_data_bits) && + saved_off + log->page_size == this_off && + lsn_base < best_lsn && + (page_pos != page_cnt || best_page_pos == page_pos || + best_page_pos == 1) && + (page_pos >= page_cnt || best_page_pos == page_pos)) { + u16 bppc = le16_to_cpu(best_page->page_count); + + saved_off += log->page_size; + lsn_base = best_lsn; + + memmove(Add2Ptr(page_bufs, tails * log->page_size), best_page, + log->page_size); + + tails += 1; + + if (best_page_pos != bppc) { + page_cnt = bppc; + page_pos = best_page_pos; + + if (page_cnt > 1) + page_pos += 1; + } else { + page_pos = page_cnt = 1; + } + } else { + ntfs_free(first_tail); + ntfs_free(second_tail); + goto tail_read; + } + + ntfs_free(first_tail_prev); + first_tail_prev = first_tail; + final_off_prev = first_file_off; + first_tail = NULL; + + ntfs_free(second_tail_prev); + second_tail_prev = second_tail; + second_off_prev = second_file_off; + second_tail = NULL; + + final_off += log->page_size; + second_off += log->page_size; + + if (tails < 0x10) + goto next_tail; +tail_read: + first_tail = first_tail_prev; + final_off = final_off_prev; + + second_tail = second_tail_prev; + second_off = second_off_prev; + + page_cnt = page_pos = 1; + + curpage_off = seq_base == log->seq_num ? min(log->next_page, page_off) + : log->next_page; + + wrapped_file = + curpage_off == log->first_page && + !(log->l_flags & (NTFSLOG_NO_LAST_LSN | NTFSLOG_REUSE_TAIL)); + + expected_seq = wrapped_file ? (log->seq_num + 1) : log->seq_num; + + nextpage_off = curpage_off; + +next_page: + tail_page = NULL; + /* Read the next log page */ + err = read_log_page(log, curpage_off, &page, &usa_error); + + /* Compute the next log page offset the file */ + nextpage_off = next_page_off(log, curpage_off); + wrapped = nextpage_off == log->first_page; + + if (tails > 1) { + struct RECORD_PAGE_HDR *cur_page = + Add2Ptr(page_bufs, curpage_off - page_off); + + if (curpage_off == saved_off) { + tail_page = cur_page; + goto use_tail_page; + } + + if (page_off > curpage_off || curpage_off >= saved_off) + goto use_tail_page; + + if (page_off1) + goto use_cur_page; + + if (!err && !usa_error && + page->rhdr.sign == NTFS_RCRD_SIGNATURE && + cur_page->rhdr.lsn == page->rhdr.lsn && + cur_page->record_hdr.next_record_off == + page->record_hdr.next_record_off && + ((page_pos == page_cnt && + le16_to_cpu(page->page_pos) == 1) || + (page_pos != page_cnt && + le16_to_cpu(page->page_pos) == page_pos + 1 && + le16_to_cpu(page->page_count) == page_cnt))) { + cur_page = NULL; + goto use_tail_page; + } + + page_off1 = page_off; + +use_cur_page: + + lsn_cur = le64_to_cpu(cur_page->rhdr.lsn); + + if (last_ok_lsn != + le64_to_cpu(cur_page->record_hdr.last_end_lsn) && + ((lsn_cur >> log->file_data_bits) + + ((curpage_off < + (lsn_to_vbo(log, lsn_cur) & ~log->page_mask)) + ? 1 + : 0)) != expected_seq) { + goto check_tail; + } + + if (!is_log_record_end(cur_page)) { + tail_page = NULL; + last_ok_lsn = lsn_cur; + goto next_page_1; + } + + log->seq_num = expected_seq; + log->l_flags &= ~NTFSLOG_NO_LAST_LSN; + log->last_lsn = le64_to_cpu(cur_page->record_hdr.last_end_lsn); + log->ra->current_lsn = cur_page->record_hdr.last_end_lsn; + + if (log->record_header_len <= + log->page_size - + le16_to_cpu(cur_page->record_hdr.next_record_off)) { + log->l_flags |= NTFSLOG_REUSE_TAIL; + log->next_page = curpage_off; + } else { + log->l_flags &= ~NTFSLOG_REUSE_TAIL; + log->next_page = nextpage_off; + } + + if (wrapped_file) + log->l_flags |= NTFSLOG_WRAPPED; + + last_ok_lsn = le64_to_cpu(cur_page->record_hdr.last_end_lsn); + goto next_page_1; + } + + /* + * If we are at the expected first page of a transfer check to see + * if either tail copy is at this offset + * If this page is the last page of a transfer, check if we wrote + * a subsequent tail copy + */ + if (page_cnt == page_pos || page_cnt == page_pos + 1) { + /* + * Check if the offset matches either the first or second + * tail copy. It is possible it will match both + */ + if (curpage_off == final_off) + tail_page = first_tail; + + /* + * If we already matched on the first page then + * check the ending lsn's. + */ + if (curpage_off == second_off) { + if (!tail_page || + (second_tail && + le64_to_cpu(second_tail->record_hdr.last_end_lsn) > + le64_to_cpu(first_tail->record_hdr + .last_end_lsn))) { + tail_page = second_tail; + } + } + } + +use_tail_page: + if (tail_page) { + /* we have a candidate for a tail copy */ + lsn_cur = le64_to_cpu(tail_page->record_hdr.last_end_lsn); + + if (last_ok_lsn < lsn_cur) { + /* + * If the sequence number is not expected, + * then don't use the tail copy + */ + if (expected_seq != (lsn_cur >> log->file_data_bits)) + tail_page = NULL; + } else if (last_ok_lsn > lsn_cur) { + /* + * If the last lsn is greater than the one on + * this page then forget this tail + */ + tail_page = NULL; + } + } + + /* If we have an error on the current page, we will break of this loop */ + if (err || usa_error) + goto check_tail; + + /* + * Done if the last lsn on this page doesn't match the previous known + * last lsn or the sequence number is not expected + */ + lsn_cur = le64_to_cpu(page->rhdr.lsn); + if (last_ok_lsn != lsn_cur && + expected_seq != (lsn_cur >> log->file_data_bits)) { + goto check_tail; + } + + /* + * Check that the page position and page count values are correct + * If this is the first page of a transfer the position must be 1 + * and the count will be unknown + */ + if (page_cnt == page_pos) { + if (page->page_pos != cpu_to_le16(1) && + (!reuse_page || page->page_pos != page->page_count)) { + /* + * If the current page is the first page we are + * looking at and we are reusing this page then + * it can be either the first or last page of a + * transfer. Otherwise it can only be the first. + */ + goto check_tail; + } + } else if (le16_to_cpu(page->page_count) != page_cnt || + le16_to_cpu(page->page_pos) != page_pos + 1) { + /* + * The page position better be 1 more than the last page + * position and the page count better match + */ + goto check_tail; + } + + /* + * We have a valid page the file and may have a valid page + * the tail copy area + * If the tail page was written after the page the file then + * break of the loop + */ + if (tail_page && + le64_to_cpu(tail_page->record_hdr.last_end_lsn) > lsn_cur) { + /* Remember if we will replace the page */ + replace_page = true; + goto check_tail; + } + + tail_page = NULL; + + if (is_log_record_end(page)) { + /* + * Since we have read this page we know the sequence number + * is the same as our expected value + */ + log->seq_num = expected_seq; + log->last_lsn = le64_to_cpu(page->record_hdr.last_end_lsn); + log->ra->current_lsn = page->record_hdr.last_end_lsn; + log->l_flags &= ~NTFSLOG_NO_LAST_LSN; + + /* + * If there is room on this page for another header then + * remember we want to reuse the page + */ + if (log->record_header_len <= + log->page_size - + le16_to_cpu(page->record_hdr.next_record_off)) { + log->l_flags |= NTFSLOG_REUSE_TAIL; + log->next_page = curpage_off; + } else { + log->l_flags &= ~NTFSLOG_REUSE_TAIL; + log->next_page = nextpage_off; + } + + /* Remember if we wrapped the log file */ + if (wrapped_file) + log->l_flags |= NTFSLOG_WRAPPED; + } + + /* + * Remember the last page count and position. + * Also remember the last known lsn + */ + page_cnt = le16_to_cpu(page->page_count); + page_pos = le16_to_cpu(page->page_pos); + last_ok_lsn = le64_to_cpu(page->rhdr.lsn); + +next_page_1: + + if (wrapped) { + expected_seq += 1; + wrapped_file = 1; + } + + curpage_off = nextpage_off; + ntfs_free(page); + page = NULL; + reuse_page = 0; + goto next_page; + +check_tail: + if (tail_page) { + log->seq_num = expected_seq; + log->last_lsn = le64_to_cpu(tail_page->record_hdr.last_end_lsn); + log->ra->current_lsn = tail_page->record_hdr.last_end_lsn; + log->l_flags &= ~NTFSLOG_NO_LAST_LSN; + + if (log->page_size - + le16_to_cpu( + tail_page->record_hdr.next_record_off) >= + log->record_header_len) { + log->l_flags |= NTFSLOG_REUSE_TAIL; + log->next_page = curpage_off; + } else { + log->l_flags &= ~NTFSLOG_REUSE_TAIL; + log->next_page = nextpage_off; + } + + if (wrapped) + log->l_flags |= NTFSLOG_WRAPPED; + } + + /* Remember that the partial IO will start at the next page */ + second_off = nextpage_off; + + /* + * If the next page is the first page of the file then update + * the sequence number for log records which begon the next page + */ + if (wrapped) + expected_seq += 1; + + /* + * If we have a tail copy or are performing single page I/O we can + * immediately look at the next page + */ + if (replace_page || (log->ra->flags & RESTART_SINGLE_PAGE_IO)) { + page_cnt = 2; + page_pos = 1; + goto check_valid; + } + + if (page_pos != page_cnt) + goto check_valid; + /* + * If the next page causes us to wrap to the beginning of the log + * file then we know which page to check next. + */ + if (wrapped) { + page_cnt = 2; + page_pos = 1; + goto check_valid; + } + + cur_pos = 2; + +next_test_page: + ntfs_free(tst_page); + tst_page = NULL; + + /* Walk through the file, reading log pages */ + err = read_log_page(log, nextpage_off, &tst_page, &usa_error); + + /* + * If we get a USA error then assume that we correctly found + * the end of the original transfer + */ + if (usa_error) + goto file_is_valid; + + /* + * If we were able to read the page, we examine it to see if it + * is the same or different Io block + */ + if (err) + goto next_test_page_1; + + if (le16_to_cpu(tst_page->page_pos) == cur_pos && + check_subseq_log_page(log, tst_page, nextpage_off, expected_seq)) { + page_cnt = le16_to_cpu(tst_page->page_count) + 1; + page_pos = le16_to_cpu(tst_page->page_pos); + goto check_valid; + } else { + goto file_is_valid; + } + +next_test_page_1: + + nextpage_off = next_page_off(log, curpage_off); + wrapped = nextpage_off == log->first_page; + + if (wrapped) { + expected_seq += 1; + page_cnt = 2; + page_pos = 1; + } + + cur_pos += 1; + part_io_count += 1; + if (!wrapped) + goto next_test_page; + +check_valid: + /* Skip over the remaining pages this transfer */ + remain_pages = page_cnt - page_pos - 1; + part_io_count += remain_pages; + + while (remain_pages--) { + nextpage_off = next_page_off(log, curpage_off); + wrapped = nextpage_off == log->first_page; + + if (wrapped) + expected_seq += 1; + } + + /* Call our routine to check this log page */ + ntfs_free(tst_page); + tst_page = NULL; + + err = read_log_page(log, nextpage_off, &tst_page, &usa_error); + if (!err && !usa_error && + check_subseq_log_page(log, tst_page, nextpage_off, expected_seq)) { + err = -EINVAL; + goto out; + } + +file_is_valid: + + /* We have a valid file */ + if (page_off1 || tail_page) { + struct RECORD_PAGE_HDR *tmp_page; + + if (sb_rdonly(log->ni->mi.sbi->sb)) { + err = -EROFS; + goto out; + } + + if (page_off1) { + tmp_page = Add2Ptr(page_bufs, page_off1 - page_off); + tails -= (page_off1 - page_off) / log->page_size; + if (!tail_page) + tails -= 1; + } else { + tmp_page = tail_page; + tails = 1; + } + + while (tails--) { + u64 off = hdr_file_off(log, tmp_page); + + if (!page) { + page = ntfs_malloc(log->page_size); + if (!page) + return -ENOMEM; + } + + /* + * Correct page and copy the data from this page + * into it and flush it to disk + */ + memcpy(page, tmp_page, log->page_size); + + /* Fill last flushed lsn value flush the page */ + if (log->major_ver < 2) + page->rhdr.lsn = page->record_hdr.last_end_lsn; + else + page->file_off = 0; + + page->page_pos = page->page_count = cpu_to_le16(1); + + ntfs_fix_pre_write(&page->rhdr, log->page_size); + + err = ntfs_sb_write_run(log->ni->mi.sbi, + &log->ni->file.run, off, page, + log->page_size); + + if (err) + goto out; + + if (part_io_count && second_off == off) { + second_off += log->page_size; + part_io_count -= 1; + } + + tmp_page = Add2Ptr(tmp_page, log->page_size); + } + } + + if (part_io_count) { + if (sb_rdonly(log->ni->mi.sbi->sb)) { + err = -EROFS; + goto out; + } + } + +out: + ntfs_free(second_tail); + ntfs_free(first_tail); + ntfs_free(page); + ntfs_free(tst_page); + ntfs_free(page_bufs); + + return err; +} + +/* + * read_log_rec_buf + * + * copies a log record from the file to a buffer + * The log record may span several log pages and may even wrap the file + */ +static int read_log_rec_buf(struct ntfs_log *log, + const struct LFS_RECORD_HDR *rh, void *buffer) +{ + int err; + struct RECORD_PAGE_HDR *ph = NULL; + u64 lsn = le64_to_cpu(rh->this_lsn); + u32 vbo = lsn_to_vbo(log, lsn) & ~log->page_mask; + u32 off = lsn_to_page_off(log, lsn) + log->record_header_len; + u32 data_len = le32_to_cpu(rh->client_data_len); + + /* + * While there are more bytes to transfer, + * we continue to attempt to perform the read + */ + for (;;) { + bool usa_error; + u32 tail = log->page_size - off; + + if (tail >= data_len) + tail = data_len; + + data_len -= tail; + + err = read_log_page(log, vbo, &ph, &usa_error); + if (err) + goto out; + + /* + * The last lsn on this page better be greater or equal + * to the lsn we are copying + */ + if (lsn > le64_to_cpu(ph->rhdr.lsn)) { + err = -EINVAL; + goto out; + } + + memcpy(buffer, Add2Ptr(ph, off), tail); + + /* If there are no more bytes to transfer, we exit the loop */ + if (!data_len) { + if (!is_log_record_end(ph) || + lsn > le64_to_cpu(ph->record_hdr.last_end_lsn)) { + err = -EINVAL; + goto out; + } + break; + } + + if (ph->rhdr.lsn == ph->record_hdr.last_end_lsn || + lsn > le64_to_cpu(ph->rhdr.lsn)) { + err = -EINVAL; + goto out; + } + + vbo = next_page_off(log, vbo); + off = log->data_off; + + /* + * adjust our pointer the user's buffer to transfer + * the next block to + */ + buffer = Add2Ptr(buffer, tail); + } + +out: + ntfs_free(ph); + return err; +} + +static int read_rst_area(struct ntfs_log *log, struct NTFS_RESTART **rst_, + u64 *lsn) +{ + int err; + struct LFS_RECORD_HDR *rh = NULL; + const struct CLIENT_REC *cr = + Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off)); + u64 lsnr, lsnc = le64_to_cpu(cr->restart_lsn); + u32 len; + struct NTFS_RESTART *rst; + + *lsn = 0; + *rst_ = NULL; + + /* If the client doesn't have a restart area, go ahead and exit now */ + if (!lsnc) + return 0; + + err = read_log_page(log, lsn_to_vbo(log, lsnc), + (struct RECORD_PAGE_HDR **)&rh, NULL); + if (err) + return err; + + rst = NULL; + lsnr = le64_to_cpu(rh->this_lsn); + + if (lsnc != lsnr) { + /* If the lsn values don't match, then the disk is corrupt */ + err = -EINVAL; + goto out; + } + + *lsn = lsnr; + len = le32_to_cpu(rh->client_data_len); + + if (!len) { + err = 0; + goto out; + } + + if (len < sizeof(struct NTFS_RESTART)) { + err = -EINVAL; + goto out; + } + + rst = ntfs_malloc(len); + if (!rst) { + err = -ENOMEM; + goto out; + } + + /* Copy the data into the 'rst' buffer */ + err = read_log_rec_buf(log, rh, rst); + if (err) + goto out; + + *rst_ = rst; + rst = NULL; + +out: + ntfs_free(rh); + ntfs_free(rst); + + return err; +} + +static int find_log_rec(struct ntfs_log *log, u64 lsn, struct lcb *lcb) +{ + int err; + struct LFS_RECORD_HDR *rh = lcb->lrh; + u32 rec_len, len; + + /* Read the record header for this lsn */ + if (!rh) { + err = read_log_page(log, lsn_to_vbo(log, lsn), + (struct RECORD_PAGE_HDR **)&rh, NULL); + + lcb->lrh = rh; + if (err) + return err; + } + + /* + * If the lsn the log record doesn't match the desired + * lsn then the disk is corrupt + */ + if (lsn != le64_to_cpu(rh->this_lsn)) + return -EINVAL; + + len = le32_to_cpu(rh->client_data_len); + + /* + * check that the length field isn't greater than the total + * available space the log file + */ + rec_len = len + log->record_header_len; + if (rec_len >= log->total_avail) + return -EINVAL; + + /* + * If the entire log record is on this log page, + * put a pointer to the log record the context block + */ + if (rh->flags & LOG_RECORD_MULTI_PAGE) { + void *lr = ntfs_malloc(len); + + if (!lr) + return -ENOMEM; + + lcb->log_rec = lr; + lcb->alloc = true; + + /* Copy the data into the buffer returned */ + err = read_log_rec_buf(log, rh, lr); + if (err) + return err; + } else { + /* If beyond the end of the current page -> an error */ + u32 page_off = lsn_to_page_off(log, lsn); + + if (page_off + len + log->record_header_len > log->page_size) + return -EINVAL; + + lcb->log_rec = Add2Ptr(rh, sizeof(struct LFS_RECORD_HDR)); + lcb->alloc = false; + } + + return 0; +} + +/* + * read_log_rec_lcb + * + * initiates the query operation. + */ +static int read_log_rec_lcb(struct ntfs_log *log, u64 lsn, u32 ctx_mode, + struct lcb **lcb_) +{ + int err; + const struct CLIENT_REC *cr; + struct lcb *lcb; + + switch (ctx_mode) { + case lcb_ctx_undo_next: + case lcb_ctx_prev: + case lcb_ctx_next: + break; + default: + return -EINVAL; + } + + /* check that the given lsn is the legal range for this client */ + cr = Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off)); + + if (!verify_client_lsn(log, cr, lsn)) + return -EINVAL; + + lcb = ntfs_zalloc(sizeof(struct lcb)); + if (!lcb) + return -ENOMEM; + lcb->client = log->client_id; + lcb->ctx_mode = ctx_mode; + + /* Find the log record indicated by the given lsn */ + err = find_log_rec(log, lsn, lcb); + if (err) + goto out; + + *lcb_ = lcb; + return 0; + +out: + lcb_put(lcb); + *lcb_ = NULL; + return err; +} + +/* + * find_client_next_lsn + * + * attempt to find the next lsn to return to a client based on the context mode. + */ +static int find_client_next_lsn(struct ntfs_log *log, struct lcb *lcb, u64 *lsn) +{ + int err; + u64 next_lsn; + struct LFS_RECORD_HDR *hdr; + + hdr = lcb->lrh; + *lsn = 0; + + if (lcb_ctx_next != lcb->ctx_mode) + goto check_undo_next; + + /* Loop as long as another lsn can be found */ + for (;;) { + u64 current_lsn; + + err = next_log_lsn(log, hdr, ¤t_lsn); + if (err) + goto out; + + if (!current_lsn) + break; + + if (hdr != lcb->lrh) + ntfs_free(hdr); + + hdr = NULL; + err = read_log_page(log, lsn_to_vbo(log, current_lsn), + (struct RECORD_PAGE_HDR **)&hdr, NULL); + if (err) + goto out; + + if (memcmp(&hdr->client, &lcb->client, + sizeof(struct CLIENT_ID))) { + /*err = -EINVAL; */ + } else if (LfsClientRecord == hdr->record_type) { + ntfs_free(lcb->lrh); + lcb->lrh = hdr; + *lsn = current_lsn; + return 0; + } + } + +out: + if (hdr != lcb->lrh) + ntfs_free(hdr); + return err; + +check_undo_next: + if (lcb_ctx_undo_next == lcb->ctx_mode) + next_lsn = le64_to_cpu(hdr->client_undo_next_lsn); + else if (lcb_ctx_prev == lcb->ctx_mode) + next_lsn = le64_to_cpu(hdr->client_prev_lsn); + else + return 0; + + if (!next_lsn) + return 0; + + if (!verify_client_lsn( + log, Add2Ptr(log->ra, le16_to_cpu(log->ra->client_off)), + next_lsn)) + return 0; + + hdr = NULL; + err = read_log_page(log, lsn_to_vbo(log, next_lsn), + (struct RECORD_PAGE_HDR **)&hdr, NULL); + if (err) + return err; + ntfs_free(lcb->lrh); + lcb->lrh = hdr; + + *lsn = next_lsn; + + return 0; +} + +static int read_next_log_rec(struct ntfs_log *log, struct lcb *lcb, u64 *lsn) +{ + int err; + + err = find_client_next_lsn(log, lcb, lsn); + if (err) + return err; + + if (!*lsn) + return 0; + + if (lcb->alloc) + ntfs_free(lcb->log_rec); + + lcb->log_rec = NULL; + lcb->alloc = false; + ntfs_free(lcb->lrh); + lcb->lrh = NULL; + + return find_log_rec(log, *lsn, lcb); +} + +static inline bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes) +{ + __le16 mask; + u32 min_de, de_off, used, total; + const struct NTFS_DE *e; + + if (hdr_has_subnode(hdr)) { + min_de = sizeof(struct NTFS_DE) + sizeof(u64); + mask = NTFS_IE_HAS_SUBNODES; + } else { + min_de = sizeof(struct NTFS_DE); + mask = 0; + } + + de_off = le32_to_cpu(hdr->de_off); + used = le32_to_cpu(hdr->used); + total = le32_to_cpu(hdr->total); + + if (de_off > bytes - min_de || used > bytes || total > bytes || + de_off + min_de > used || used > total) { + return false; + } + + e = Add2Ptr(hdr, de_off); + for (;;) { + u16 esize = le16_to_cpu(e->size); + struct NTFS_DE *next = Add2Ptr(e, esize); + + if (esize < min_de || PtrOffset(hdr, next) > used || + (e->flags & NTFS_IE_HAS_SUBNODES) != mask) { + return false; + } + + if (de_is_last(e)) + break; + + e = next; + } + + return true; +} + +static inline bool check_index_buffer(const struct INDEX_BUFFER *ib, u32 bytes) +{ + u16 fo; + const struct NTFS_RECORD_HEADER *r = &ib->rhdr; + + if (r->sign != NTFS_INDX_SIGNATURE) + return false; + + fo = (SECTOR_SIZE - ((bytes >> SECTOR_SHIFT) + 1) * sizeof(short)); + + if (le16_to_cpu(r->fix_off) > fo) + return false; + + if ((le16_to_cpu(r->fix_num) - 1) * SECTOR_SIZE != bytes) + return false; + + return check_index_header(&ib->ihdr, + bytes - offsetof(struct INDEX_BUFFER, ihdr)); +} + +static inline bool check_index_root(const struct ATTRIB *attr, + struct ntfs_sb_info *sbi) +{ + bool ret; + const struct INDEX_ROOT *root = resident_data(attr); + u8 index_bits = le32_to_cpu(root->index_block_size) >= sbi->cluster_size + ? sbi->cluster_bits + : SECTOR_SHIFT; + u8 block_clst = root->index_block_clst; + + if (le32_to_cpu(attr->res.data_size) < sizeof(struct INDEX_ROOT) || + (root->type != ATTR_NAME && root->type != ATTR_ZERO) || + (root->type == ATTR_NAME && + root->rule != NTFS_COLLATION_TYPE_FILENAME) || + (le32_to_cpu(root->index_block_size) != + (block_clst << index_bits)) || + (block_clst != 1 && block_clst != 2 && block_clst != 4 && + block_clst != 8 && block_clst != 0x10 && block_clst != 0x20 && + block_clst != 0x40 && block_clst != 0x80)) { + return false; + } + + ret = check_index_header(&root->ihdr, + le32_to_cpu(attr->res.data_size) - + offsetof(struct INDEX_ROOT, ihdr)); + return ret; +} + +static inline bool check_attr(const struct MFT_REC *rec, + const struct ATTRIB *attr, + struct ntfs_sb_info *sbi) +{ + u32 asize = le32_to_cpu(attr->size); + u32 rsize = 0; + u64 dsize, svcn, evcn; + u16 run_off; + + /* Check the fixed part of the attribute record header */ + if (asize >= sbi->record_size || + asize + PtrOffset(rec, attr) >= sbi->record_size || + (attr->name_len && + le16_to_cpu(attr->name_off) + attr->name_len * sizeof(short) > + asize)) { + return false; + } + + /* Check the attribute fields */ + switch (attr->non_res) { + case 0: + rsize = le32_to_cpu(attr->res.data_size); + if (rsize >= asize || + le16_to_cpu(attr->res.data_off) + rsize > asize) { + return false; + } + break; + + case 1: + dsize = le64_to_cpu(attr->nres.data_size); + svcn = le64_to_cpu(attr->nres.svcn); + evcn = le64_to_cpu(attr->nres.evcn); + run_off = le16_to_cpu(attr->nres.run_off); + + if (svcn > evcn + 1 || run_off >= asize || + le64_to_cpu(attr->nres.valid_size) > dsize || + dsize > le64_to_cpu(attr->nres.alloc_size)) { + return false; + } + + if (run_unpack(NULL, sbi, 0, svcn, evcn, svcn, + Add2Ptr(attr, run_off), asize - run_off) < 0) { + return false; + } + + return true; + + default: + return false; + } + + switch (attr->type) { + case ATTR_NAME: + if (fname_full_size(Add2Ptr( + attr, le16_to_cpu(attr->res.data_off))) > asize) { + return false; + } + break; + + case ATTR_ROOT: + return check_index_root(attr, sbi); + + case ATTR_STD: + if (rsize < sizeof(struct ATTR_STD_INFO5) && + rsize != sizeof(struct ATTR_STD_INFO)) { + return false; + } + break; + + case ATTR_LIST: + case ATTR_ID: + case ATTR_SECURE: + case ATTR_LABEL: + case ATTR_VOL_INFO: + case ATTR_DATA: + case ATTR_ALLOC: + case ATTR_BITMAP: + case ATTR_REPARSE: + case ATTR_EA_INFO: + case ATTR_EA: + case ATTR_PROPERTYSET: + case ATTR_LOGGED_UTILITY_STREAM: + break; + + default: + return false; + } + + return true; +} + +static inline bool check_file_record(const struct MFT_REC *rec, + const struct MFT_REC *rec2, + struct ntfs_sb_info *sbi) +{ + const struct ATTRIB *attr; + u16 fo = le16_to_cpu(rec->rhdr.fix_off); + u16 fn = le16_to_cpu(rec->rhdr.fix_num); + u16 ao = le16_to_cpu(rec->attr_off); + u32 rs = sbi->record_size; + + /* check the file record header for consistency */ + if (rec->rhdr.sign != NTFS_FILE_SIGNATURE || + fo > (SECTOR_SIZE - ((rs >> SECTOR_SHIFT) + 1) * sizeof(short)) || + (fn - 1) * SECTOR_SIZE != rs || ao < MFTRECORD_FIXUP_OFFSET_1 || + ao > sbi->record_size - SIZEOF_RESIDENT || !is_rec_inuse(rec) || + le32_to_cpu(rec->total) != rs) { + return false; + } + + /* Loop to check all of the attributes */ + for (attr = Add2Ptr(rec, ao); attr->type != ATTR_END; + attr = Add2Ptr(attr, le32_to_cpu(attr->size))) { + if (check_attr(rec, attr, sbi)) + continue; + return false; + } + + return true; +} + +static inline int check_lsn(const struct NTFS_RECORD_HEADER *hdr, + const u64 *rlsn) +{ + u64 lsn; + + if (!rlsn) + return true; + + lsn = le64_to_cpu(hdr->lsn); + + if (hdr->sign == NTFS_HOLE_SIGNATURE) + return false; + + if (*rlsn > lsn) + return true; + + return false; +} + +static inline bool check_if_attr(const struct MFT_REC *rec, + const struct LOG_REC_HDR *lrh) +{ + u16 ro = le16_to_cpu(lrh->record_off); + u16 o = le16_to_cpu(rec->attr_off); + const struct ATTRIB *attr = Add2Ptr(rec, o); + + while (o < ro) { + u32 asize; + + if (attr->type == ATTR_END) + break; + + asize = le32_to_cpu(attr->size); + if (!asize) + break; + + o += asize; + attr = Add2Ptr(attr, asize); + } + + return o == ro; +} + +static inline bool check_if_index_root(const struct MFT_REC *rec, + const struct LOG_REC_HDR *lrh) +{ + u16 ro = le16_to_cpu(lrh->record_off); + u16 o = le16_to_cpu(rec->attr_off); + const struct ATTRIB *attr = Add2Ptr(rec, o); + + while (o < ro) { + u32 asize; + + if (attr->type == ATTR_END) + break; + + asize = le32_to_cpu(attr->size); + if (!asize) + break; + + o += asize; + attr = Add2Ptr(attr, asize); + } + + return o == ro && attr->type == ATTR_ROOT; +} + +static inline bool check_if_root_index(const struct ATTRIB *attr, + const struct INDEX_HDR *hdr, + const struct LOG_REC_HDR *lrh) +{ + u16 ao = le16_to_cpu(lrh->attr_off); + u32 de_off = le32_to_cpu(hdr->de_off); + u32 o = PtrOffset(attr, hdr) + de_off; + const struct NTFS_DE *e = Add2Ptr(hdr, de_off); + u32 asize = le32_to_cpu(attr->size); + + while (o < ao) { + u16 esize; + + if (o >= asize) + break; + + esize = le16_to_cpu(e->size); + if (!esize) + break; + + o += esize; + e = Add2Ptr(e, esize); + } + + return o == ao; +} + +static inline bool check_if_alloc_index(const struct INDEX_HDR *hdr, + u32 attr_off) +{ + u32 de_off = le32_to_cpu(hdr->de_off); + u32 o = offsetof(struct INDEX_BUFFER, ihdr) + de_off; + const struct NTFS_DE *e = Add2Ptr(hdr, de_off); + u32 used = le32_to_cpu(hdr->used); + + while (o < attr_off) { + u16 esize; + + if (de_off >= used) + break; + + esize = le16_to_cpu(e->size); + if (!esize) + break; + + o += esize; + de_off += esize; + e = Add2Ptr(e, esize); + } + + return o == attr_off; +} + +static inline void change_attr_size(struct MFT_REC *rec, struct ATTRIB *attr, + u32 nsize) +{ + u32 asize = le32_to_cpu(attr->size); + int dsize = nsize - asize; + u8 *next = Add2Ptr(attr, asize); + u32 used = le32_to_cpu(rec->used); + + memmove(Add2Ptr(attr, nsize), next, used - PtrOffset(rec, next)); + + rec->used = cpu_to_le32(used + dsize); + attr->size = cpu_to_le32(nsize); +} + +struct OpenAttr { + struct ATTRIB *attr; + struct runs_tree *run1; + struct runs_tree run0; + struct ntfs_inode *ni; + // CLST rno; +}; + +/* Returns 0 if 'attr' has the same type and name */ +static inline int cmp_type_and_name(const struct ATTRIB *a1, + const struct ATTRIB *a2) +{ + return a1->type != a2->type || a1->name_len != a2->name_len || + (a1->name_len && memcmp(attr_name(a1), attr_name(a2), + a1->name_len * sizeof(short))); +} + +static struct OpenAttr *find_loaded_attr(struct ntfs_log *log, + const struct ATTRIB *attr, CLST rno) +{ + struct OPEN_ATTR_ENRTY *oe = NULL; + + while ((oe = enum_rstbl(log->open_attr_tbl, oe))) { + struct OpenAttr *op_attr; + + if (ino_get(&oe->ref) != rno) + continue; + + op_attr = (struct OpenAttr *)oe->ptr; + if (!cmp_type_and_name(op_attr->attr, attr)) + return op_attr; + } + return NULL; +} + +static struct ATTRIB *attr_create_nonres_log(struct ntfs_sb_info *sbi, + enum ATTR_TYPE type, u64 size, + const u16 *name, size_t name_len, + __le16 flags) +{ + struct ATTRIB *attr; + u32 name_size = QuadAlign(name_len * sizeof(short)); + bool is_ext = flags & (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED); + u32 asize = name_size + + (is_ext ? SIZEOF_NONRESIDENT_EX : SIZEOF_NONRESIDENT); + + attr = ntfs_zalloc(asize); + if (!attr) + return NULL; + + attr->type = type; + attr->size = cpu_to_le32(asize); + attr->flags = flags; + attr->non_res = 1; + attr->name_len = name_len; + + attr->nres.evcn = cpu_to_le64((u64)bytes_to_cluster(sbi, size) - 1); + attr->nres.alloc_size = cpu_to_le64(ntfs_up_cluster(sbi, size)); + attr->nres.data_size = cpu_to_le64(size); + attr->nres.valid_size = attr->nres.data_size; + if (is_ext) { + attr->name_off = SIZEOF_NONRESIDENT_EX_LE; + if (is_attr_compressed(attr)) + attr->nres.c_unit = COMPRESSION_UNIT; + + attr->nres.run_off = + cpu_to_le16(SIZEOF_NONRESIDENT_EX + name_size); + memcpy(Add2Ptr(attr, SIZEOF_NONRESIDENT_EX), name, + name_len * sizeof(short)); + } else { + attr->name_off = SIZEOF_NONRESIDENT_LE; + attr->nres.run_off = + cpu_to_le16(SIZEOF_NONRESIDENT + name_size); + memcpy(Add2Ptr(attr, SIZEOF_NONRESIDENT), name, + name_len * sizeof(short)); + } + + return attr; +} + +/* + * do_action + * + * common routine for the Redo and Undo Passes + * If rlsn is NULL then undo + */ +static int do_action(struct ntfs_log *log, struct OPEN_ATTR_ENRTY *oe, + const struct LOG_REC_HDR *lrh, u32 op, void *data, + u32 dlen, u32 rec_len, const u64 *rlsn) +{ + int err = 0; + struct ntfs_sb_info *sbi = log->ni->mi.sbi; + struct inode *inode = NULL, *inode_parent; + struct mft_inode *mi = NULL, *mi2_child = NULL; + CLST rno = 0, rno_base = 0; + struct INDEX_BUFFER *ib = NULL; + struct MFT_REC *rec = NULL; + struct ATTRIB *attr = NULL, *attr2; + struct INDEX_HDR *hdr; + struct INDEX_ROOT *root; + struct NTFS_DE *e, *e1, *e2; + struct NEW_ATTRIBUTE_SIZES *new_sz; + struct ATTR_FILE_NAME *fname; + struct OpenAttr *oa, *oa2; + u32 nsize, t32, asize, used, esize, bmp_off, bmp_bits; + u16 t16, id, id2; + u32 record_size = sbi->record_size; + u64 t64; + u64 lco = 0; + u64 cbo = (u64)le16_to_cpu(lrh->cluster_off) << SECTOR_SHIFT; + u64 tvo = le64_to_cpu(lrh->target_vcn) << sbi->cluster_bits; + u64 vbo = cbo + tvo; + void *buffer_le = NULL; + u32 bytes = 0; + bool a_dirty = false; + u16 data_off; + + oa = oe->ptr; + + /* Big switch to prepare */ + switch (op) { + /* ============================================================ + * Process MFT records, as described by the current log record + * ============================================================ + */ + case InitializeFileRecordSegment: + case DeallocateFileRecordSegment: + case WriteEndOfFileRecordSegment: + case CreateAttribute: + case DeleteAttribute: + case UpdateResidentValue: + case UpdateMappingPairs: + case SetNewAttributeSizes: + case AddIndexEntryRoot: + case DeleteIndexEntryRoot: + case SetIndexEntryVcnRoot: + case UpdateFileNameRoot: + case UpdateRecordDataRoot: + case ZeroEndOfFileRecord: + + rno = vbo >> sbi->record_bits; + inode = ilookup(sbi->sb, rno); + if (inode) { + mi = &ntfs_i(inode)->mi; + } else if (op == InitializeFileRecordSegment) { + mi = ntfs_zalloc(sizeof(struct mft_inode)); + if (!mi) + return -ENOMEM; + err = mi_format_new(mi, sbi, rno, 0, false); + if (err) + goto out; + } else { + /* read from disk */ + err = mi_get(sbi, rno, &mi); + if (err) + return err; + } + rec = mi->mrec; + + if (op == DeallocateFileRecordSegment) + goto skip_load_parent; + + if (InitializeFileRecordSegment != op) { + if (rec->rhdr.sign == NTFS_BAAD_SIGNATURE) + goto dirty_vol; + if (!check_lsn(&rec->rhdr, rlsn)) + goto out; + if (!check_file_record(rec, NULL, sbi)) + goto dirty_vol; + attr = Add2Ptr(rec, le16_to_cpu(lrh->record_off)); + } + + if (is_rec_base(rec) || InitializeFileRecordSegment == op) { + rno_base = rno; + goto skip_load_parent; + } + + rno_base = ino_get(&rec->parent_ref); + inode_parent = ntfs_iget5(sbi->sb, &rec->parent_ref, NULL); + if (IS_ERR(inode_parent)) + goto skip_load_parent; + + if (is_bad_inode(inode_parent)) { + iput(inode_parent); + goto skip_load_parent; + } + + if (ni_load_mi_ex(ntfs_i(inode_parent), rno, &mi2_child)) { + iput(inode_parent); + } else { + if (mi2_child->mrec != mi->mrec) + memcpy(mi2_child->mrec, mi->mrec, + sbi->record_size); + + if (inode) + iput(inode); + else if (mi) + mi_put(mi); + + inode = inode_parent; + mi = mi2_child; + rec = mi2_child->mrec; + attr = Add2Ptr(rec, le16_to_cpu(lrh->record_off)); + } + +skip_load_parent: + inode_parent = NULL; + break; + + /* ============================================================ + * Process attributes, as described by the current log record + * ============================================================ + */ + case UpdateNonresidentValue: + case AddIndexEntryAllocation: + case DeleteIndexEntryAllocation: + case WriteEndOfIndexBuffer: + case SetIndexEntryVcnAllocation: + case UpdateFileNameAllocation: + case SetBitsInNonresidentBitMap: + case ClearBitsInNonresidentBitMap: + case UpdateRecordDataAllocation: + + attr = oa->attr; + bytes = UpdateNonresidentValue == op ? dlen : 0; + lco = (u64)le16_to_cpu(lrh->lcns_follow) << sbi->cluster_bits; + + if (attr->type == ATTR_ALLOC) { + t32 = le32_to_cpu(oe->bytes_per_index); + if (bytes < t32) + bytes = t32; + } + + if (!bytes) + bytes = lco - cbo; + + bytes += le16_to_cpu(lrh->record_off); + if (attr->type == ATTR_ALLOC) + bytes = (bytes + 511) & ~511; // align + + buffer_le = ntfs_malloc(bytes); + if (!buffer_le) + return -ENOMEM; + + err = ntfs_read_run_nb(sbi, oa->run1, vbo, buffer_le, bytes, + NULL); + if (err) + goto out; + + if (attr->type == ATTR_ALLOC && *(int *)buffer_le) + ntfs_fix_post_read(buffer_le, bytes, false); + break; + + default: + WARN_ON(1); + } + + /* Big switch to do operation */ + switch (op) { + case InitializeFileRecordSegment: + t16 = le16_to_cpu(lrh->record_off); + if (t16 + dlen > record_size) + goto dirty_vol; + + memcpy(Add2Ptr(rec, t16), data, dlen); + mi->dirty = true; + break; + + case DeallocateFileRecordSegment: + clear_rec_inuse(rec); + le16_add_cpu(&rec->seq, 1); + mi->dirty = true; + break; + + case WriteEndOfFileRecordSegment: + attr2 = (struct ATTRIB *)data; + t16 = le16_to_cpu(lrh->record_off); + + if (!check_if_attr(rec, lrh) || t16 + dlen > record_size) + goto dirty_vol; + + memmove(attr, attr2, dlen); + rec->used = cpu_to_le32(QuadAlign(t16 + dlen)); + + mi->dirty = true; + break; + + case CreateAttribute: + attr2 = (struct ATTRIB *)data; + asize = le32_to_cpu(attr2->size); + used = le32_to_cpu(rec->used); + t16 = le16_to_cpu(lrh->record_off); + + if (!check_if_attr(rec, lrh) || dlen < SIZEOF_RESIDENT || + !IsQuadAligned(asize) || + Add2Ptr(attr2, asize) > Add2Ptr(lrh, rec_len) || + dlen > record_size - used) { + goto dirty_vol; + } + + memmove(Add2Ptr(attr, asize), attr, used - t16); + memcpy(attr, attr2, asize); + + rec->used = cpu_to_le32(used + asize); + id = le16_to_cpu(rec->next_attr_id); + id2 = le16_to_cpu(attr2->id); + if (id <= id2) + rec->next_attr_id = cpu_to_le16(id2 + 1); + if (is_attr_indexed(attr)) + le16_add_cpu(&rec->hard_links, 1); + + oa2 = find_loaded_attr(log, attr, rno_base); + if (oa2) { + void *p2 = ntfs_memdup(attr, le32_to_cpu(attr->size)); + + if (p2) { + // run_close(oa2->run1); + ntfs_free(oa2->attr); + oa2->attr = p2; + } + } + + mi->dirty = true; + break; + + case DeleteAttribute: + asize = le32_to_cpu(attr->size); + used = le32_to_cpu(rec->used); + t16 = le16_to_cpu(lrh->record_off); + + if (!check_if_attr(rec, lrh)) + goto dirty_vol; + + rec->used = cpu_to_le32(used - asize); + if (is_attr_indexed(attr)) + le16_add_cpu(&rec->hard_links, -1); + + memmove(attr, Add2Ptr(attr, asize), used - t16); + + mi->dirty = true; + break; + + case UpdateResidentValue: + t16 = le16_to_cpu(lrh->attr_off); + nsize = t16 + dlen; + + if (!check_if_attr(rec, lrh)) + goto dirty_vol; + + asize = le32_to_cpu(attr->size); + used = le32_to_cpu(rec->used); + + if (lrh->redo_len == lrh->undo_len) { + if (nsize > asize) + goto dirty_vol; + goto move_data; + } + + if (nsize > asize && nsize - asize > record_size - used) + goto dirty_vol; + + nsize = QuadAlign(nsize); + data_off = le16_to_cpu(attr->res.data_off); + + if (nsize < asize) { + memmove(Add2Ptr(attr, t16), data, dlen); + data = NULL; // To skip below memmove + } + + memmove(Add2Ptr(attr, nsize), Add2Ptr(attr, asize), + used - le16_to_cpu(lrh->record_off) - asize); + + rec->used = cpu_to_le32(used + nsize - asize); + attr->size = cpu_to_le32(nsize); + attr->res.data_size = cpu_to_le32(t16 + dlen - data_off); + +move_data: + if (data) + memmove(Add2Ptr(attr, t16), data, dlen); + + oa2 = find_loaded_attr(log, attr, rno_base); + if (oa2) { + void *p2 = ntfs_memdup(attr, le32_to_cpu(attr->size)); + + if (p2) { + // run_close(&oa2->run0); + oa2->run1 = &oa2->run0; + ntfs_free(oa2->attr); + oa2->attr = p2; + } + } + + mi->dirty = true; + break; + + case UpdateMappingPairs: + t16 = le16_to_cpu(lrh->attr_off); + nsize = t16 + dlen; + asize = le32_to_cpu(attr->size); + used = le32_to_cpu(rec->used); + + if (!check_if_attr(rec, lrh) || !attr->non_res || + t16 < le16_to_cpu(attr->nres.run_off) || t16 > asize || + (nsize > asize && nsize - asize > record_size - used)) { + goto dirty_vol; + } + + nsize = QuadAlign(nsize); + + memmove(Add2Ptr(attr, nsize), Add2Ptr(attr, asize), + used - le16_to_cpu(lrh->record_off) - asize); + rec->used = cpu_to_le32(used + nsize - asize); + attr->size = cpu_to_le32(nsize); + memmove(Add2Ptr(attr, t16), data, dlen); + + if (run_get_highest_vcn(le64_to_cpu(attr->nres.svcn), + attr_run(attr), &t64)) { + goto dirty_vol; + } + + attr->nres.evcn = cpu_to_le64(t64); + oa2 = find_loaded_attr(log, attr, rno_base); + if (oa2 && oa2->attr->non_res) + oa2->attr->nres.evcn = attr->nres.evcn; + + mi->dirty = true; + break; + + case SetNewAttributeSizes: + new_sz = data; + + if (!check_if_attr(rec, lrh) || !attr->non_res) + goto dirty_vol; + + attr->nres.alloc_size = new_sz->alloc_size; + attr->nres.data_size = new_sz->data_size; + attr->nres.valid_size = new_sz->valid_size; + + if (dlen >= sizeof(struct NEW_ATTRIBUTE_SIZES)) + attr->nres.total_size = new_sz->total_size; + + oa2 = find_loaded_attr(log, attr, rno_base); + if (oa2) { + void *p2 = ntfs_memdup(attr, le32_to_cpu(attr->size)); + + if (p2) { + ntfs_free(oa2->attr); + oa2->attr = p2; + } + } + mi->dirty = true; + break; + + case AddIndexEntryRoot: + e = (struct NTFS_DE *)data; + esize = le16_to_cpu(e->size); + root = resident_data(attr); + hdr = &root->ihdr; + used = le32_to_cpu(hdr->used); + + if (!check_if_index_root(rec, lrh) || + !check_if_root_index(attr, hdr, lrh) || + Add2Ptr(data, esize) > Add2Ptr(lrh, rec_len) || + esize > le32_to_cpu(rec->total) - le32_to_cpu(rec->used)) { + goto dirty_vol; + } + + e1 = Add2Ptr(attr, le16_to_cpu(lrh->attr_off)); + + change_attr_size(rec, attr, le32_to_cpu(attr->size) + esize); + + memmove(Add2Ptr(e1, esize), e1, + PtrOffset(e1, Add2Ptr(hdr, used))); + memmove(e1, e, esize); + + le32_add_cpu(&attr->res.data_size, esize); + hdr->used = cpu_to_le32(used + esize); + le32_add_cpu(&hdr->total, esize); + + mi->dirty = true; + break; + + case DeleteIndexEntryRoot: + root = resident_data(attr); + hdr = &root->ihdr; + used = le32_to_cpu(hdr->used); + + if (!check_if_index_root(rec, lrh) || + !check_if_root_index(attr, hdr, lrh)) { + goto dirty_vol; + } + + e1 = Add2Ptr(attr, le16_to_cpu(lrh->attr_off)); + esize = le16_to_cpu(e1->size); + e2 = Add2Ptr(e1, esize); + + memmove(e1, e2, PtrOffset(e2, Add2Ptr(hdr, used))); + + le32_sub_cpu(&attr->res.data_size, esize); + hdr->used = cpu_to_le32(used - esize); + le32_sub_cpu(&hdr->total, esize); + + change_attr_size(rec, attr, le32_to_cpu(attr->size) - esize); + + mi->dirty = true; + break; + + case SetIndexEntryVcnRoot: + root = resident_data(attr); + hdr = &root->ihdr; + + if (!check_if_index_root(rec, lrh) || + !check_if_root_index(attr, hdr, lrh)) { + goto dirty_vol; + } + + e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off)); + + de_set_vbn_le(e, *(__le64 *)data); + mi->dirty = true; + break; + + case UpdateFileNameRoot: + root = resident_data(attr); + hdr = &root->ihdr; + + if (!check_if_index_root(rec, lrh) || + !check_if_root_index(attr, hdr, lrh)) { + goto dirty_vol; + } + + e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off)); + fname = (struct ATTR_FILE_NAME *)(e + 1); + memmove(&fname->dup, data, sizeof(fname->dup)); // + mi->dirty = true; + break; + + case UpdateRecordDataRoot: + root = resident_data(attr); + hdr = &root->ihdr; + + if (!check_if_index_root(rec, lrh) || + !check_if_root_index(attr, hdr, lrh)) { + goto dirty_vol; + } + + e = Add2Ptr(attr, le16_to_cpu(lrh->attr_off)); + + memmove(Add2Ptr(e, le16_to_cpu(e->view.data_off)), data, dlen); + + mi->dirty = true; + break; + + case ZeroEndOfFileRecord: + t16 = le16_to_cpu(lrh->record_off); + if (t16 + dlen > record_size) + goto dirty_vol; + + memset(attr, 0, dlen); + mi->dirty = true; + break; + + case UpdateNonresidentValue: + t16 = le16_to_cpu(lrh->record_off); + + if (lco < cbo + t16 + dlen) + goto dirty_vol; + + memcpy(Add2Ptr(buffer_le, t16), data, dlen); + + a_dirty = true; + if (attr->type == ATTR_ALLOC) + ntfs_fix_pre_write(buffer_le, bytes); + break; + + case AddIndexEntryAllocation: + t16 = le16_to_cpu(lrh->record_off); + ib = Add2Ptr(buffer_le, t16); + hdr = &ib->ihdr; + e = data; + esize = le16_to_cpu(e->size); + t16 = le16_to_cpu(lrh->attr_off); + e1 = Add2Ptr(ib, t16); + + if (is_baad(&ib->rhdr)) + goto dirty_vol; + if (!check_lsn(&ib->rhdr, rlsn)) + goto out; + + used = le32_to_cpu(hdr->used); + + if (!check_index_buffer(ib, bytes) || + !check_if_alloc_index(hdr, t16) || + Add2Ptr(e, esize) > Add2Ptr(lrh, rec_len) || + used + esize > le32_to_cpu(hdr->total)) { + goto dirty_vol; + } + + memmove(Add2Ptr(e1, esize), e1, + PtrOffset(e1, Add2Ptr(hdr, used))); + memcpy(e1, e, esize); + + hdr->used = cpu_to_le32(used + esize); + + a_dirty = true; + + ntfs_fix_pre_write(&ib->rhdr, bytes); + break; + + case DeleteIndexEntryAllocation: + t16 = le16_to_cpu(lrh->record_off); + ib = Add2Ptr(buffer_le, t16); + hdr = &ib->ihdr; + t16 = le16_to_cpu(lrh->attr_off); + e = Add2Ptr(ib, t16); + esize = le16_to_cpu(e->size); + + if (is_baad(&ib->rhdr)) + goto dirty_vol; + if (!check_lsn(&ib->rhdr, rlsn)) + goto out; + + if (!check_index_buffer(ib, bytes) || + !check_if_alloc_index(hdr, t16)) { + goto dirty_vol; + } + + e1 = Add2Ptr(e, esize); + nsize = esize; + used = le32_to_cpu(hdr->used); + + memmove(e, e1, PtrOffset(e1, Add2Ptr(hdr, used))); + + hdr->used = cpu_to_le32(used - nsize); + + a_dirty = true; + + ntfs_fix_pre_write(&ib->rhdr, bytes); + break; + + case WriteEndOfIndexBuffer: + t16 = le16_to_cpu(lrh->record_off); + ib = Add2Ptr(buffer_le, t16); + hdr = &ib->ihdr; + t16 = le16_to_cpu(lrh->attr_off); + e = Add2Ptr(ib, t16); + + if (is_baad(&ib->rhdr)) + goto dirty_vol; + if (!check_lsn(&ib->rhdr, rlsn)) + goto out; + if (!check_index_buffer(ib, bytes) || + !check_if_alloc_index(hdr, t16) || + t16 + dlen > offsetof(struct INDEX_BUFFER, ihdr) + + le32_to_cpu(hdr->total)) { + goto dirty_vol; + } + + hdr->used = cpu_to_le32(dlen + PtrOffset(hdr, e)); + memmove(e, data, dlen); + + a_dirty = true; + ntfs_fix_pre_write(&ib->rhdr, bytes); + break; + + case SetIndexEntryVcnAllocation: + t16 = le16_to_cpu(lrh->record_off); + ib = Add2Ptr(buffer_le, t16); + hdr = &ib->ihdr; + t16 = le16_to_cpu(lrh->attr_off); + e = Add2Ptr(ib, t16); + + if (is_baad(&ib->rhdr)) + goto dirty_vol; + + if (!check_lsn(&ib->rhdr, rlsn)) + goto out; + if (!check_index_buffer(ib, bytes) || + !check_if_alloc_index(hdr, t16)) { + goto dirty_vol; + } + + de_set_vbn_le(e, *(__le64 *)data); + + a_dirty = true; + ntfs_fix_pre_write(&ib->rhdr, bytes); + break; + + case UpdateFileNameAllocation: + t16 = le16_to_cpu(lrh->record_off); + ib = Add2Ptr(buffer_le, t16); + hdr = &ib->ihdr; + t16 = le16_to_cpu(lrh->attr_off); + e = Add2Ptr(ib, t16); + + if (is_baad(&ib->rhdr)) + goto dirty_vol; + + if (!check_lsn(&ib->rhdr, rlsn)) + goto out; + if (!check_index_buffer(ib, bytes) || + !check_if_alloc_index(hdr, t16)) { + goto dirty_vol; + } + + fname = (struct ATTR_FILE_NAME *)(e + 1); + memmove(&fname->dup, data, sizeof(fname->dup)); + + a_dirty = true; + ntfs_fix_pre_write(&ib->rhdr, bytes); + break; + + case SetBitsInNonresidentBitMap: + bmp_off = + le32_to_cpu(((struct BITMAP_RANGE *)data)->bitmap_off); + bmp_bits = le32_to_cpu(((struct BITMAP_RANGE *)data)->bits); + t16 = le16_to_cpu(lrh->record_off); + + if (cbo + (bmp_off + 7) / 8 > lco || + cbo + ((bmp_off + bmp_bits + 7) / 8) > lco) { + goto dirty_vol; + } + + __bitmap_set(Add2Ptr(buffer_le, t16), bmp_off, bmp_bits); + a_dirty = true; + break; + + case ClearBitsInNonresidentBitMap: + bmp_off = + le32_to_cpu(((struct BITMAP_RANGE *)data)->bitmap_off); + bmp_bits = le32_to_cpu(((struct BITMAP_RANGE *)data)->bits); + t16 = le16_to_cpu(lrh->record_off); + + if (cbo + (bmp_off + 7) / 8 > lco || + cbo + ((bmp_off + bmp_bits + 7) / 8) > lco) { + goto dirty_vol; + } + + __bitmap_clear(Add2Ptr(buffer_le, t16), bmp_off, bmp_bits); + a_dirty = true; + break; + + case UpdateRecordDataAllocation: + t16 = le16_to_cpu(lrh->record_off); + ib = Add2Ptr(buffer_le, t16); + hdr = &ib->ihdr; + t16 = le16_to_cpu(lrh->attr_off); + e = Add2Ptr(ib, t16); + + if (is_baad(&ib->rhdr)) + goto dirty_vol; + + if (!check_lsn(&ib->rhdr, rlsn)) + goto out; + if (!check_index_buffer(ib, bytes) || + !check_if_alloc_index(hdr, t16)) { + goto dirty_vol; + } + + memmove(Add2Ptr(e, le16_to_cpu(e->view.data_off)), data, dlen); + + a_dirty = true; + ntfs_fix_pre_write(&ib->rhdr, bytes); + break; + + default: + WARN_ON(1); + } + + if (rlsn) { + __le64 t64 = cpu_to_le64(*rlsn); + + if (rec) + rec->rhdr.lsn = t64; + if (ib) + ib->rhdr.lsn = t64; + } + + if (inode) { + err = _ni_write_inode(inode, 0); + } else if (mi && mi->dirty) { + err = mi_write(mi, 0); + if (err) + goto out; + } + + if (a_dirty) { + attr = oa->attr; + err = ntfs_sb_write_run(sbi, oa->run1, vbo, buffer_le, bytes); + if (err) + goto out; + } + +out: + + if (inode) + iput(inode); + else if (mi != mi2_child) + mi_put(mi); + + ntfs_free(buffer_le); + + return err; + +dirty_vol: + log->set_dirty = true; + goto out; +} + +/* + * log_replay + * + * this function is called during mount operation + * it replays log and empties it + * initialized is set false if logfile contains '-1' + */ +int log_replay(struct ntfs_inode *ni, bool *initialized) +{ + int err; + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct ntfs_log *log; + + struct restart_info rst_info, rst_info2; + u64 rec_lsn, ra_lsn, checkpt_lsn = 0, rlsn = 0; + struct ATTR_NAME_ENTRY *attr_names = NULL; + struct ATTR_NAME_ENTRY *ane; + struct RESTART_TABLE *dptbl = NULL; + struct RESTART_TABLE *trtbl = NULL; + const struct RESTART_TABLE *rt; + struct RESTART_TABLE *oatbl = NULL; + struct inode *inode; + struct OpenAttr *oa; + struct ntfs_inode *ni_oe; + struct ATTRIB *attr = NULL; + u64 size, vcn, undo_next_lsn; + CLST rno, lcn, lcn0, len0, clen; + void *data; + struct NTFS_RESTART *rst = NULL; + struct lcb *lcb = NULL; + struct OPEN_ATTR_ENRTY *oe; + struct TRANSACTION_ENTRY *tr; + struct DIR_PAGE_ENTRY *dp; + u32 i, bytes_per_attr_entry; + u32 l_size = ni->vfs_inode.i_size; + u32 orig_file_size = l_size; + u32 page_size, vbo, tail, off, dlen; + u32 saved_len, rec_len, transact_id; + bool use_second_page; + struct RESTART_AREA *ra2, *ra = NULL; + struct CLIENT_REC *ca, *cr; + __le16 client; + struct RESTART_HDR *rh; + const struct LFS_RECORD_HDR *frh; + const struct LOG_REC_HDR *lrh; + bool is_mapped; + bool is_ro = sb_rdonly(sbi->sb); + u64 t64; + u16 t16; + u32 t32; + + /* Get the size of page. NOTE: To replay we can use default page */ +#if PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <= DefaultLogPageSize * 2 + page_size = norm_file_page(PAGE_SIZE, &l_size, true); +#else + page_size = norm_file_page(PAGE_SIZE, &l_size, false); +#endif + if (!page_size) + return -EINVAL; + + log = ntfs_zalloc(sizeof(struct ntfs_log)); + if (!log) + return -ENOMEM; + + log->ni = ni; + log->l_size = l_size; + log->one_page_buf = ntfs_malloc(page_size); + + if (!log->one_page_buf) { + err = -ENOMEM; + goto out; + } + + log->page_size = page_size; + log->page_mask = page_size - 1; + log->page_bits = blksize_bits(page_size); + + /* Look for a restart area on the disk */ + err = log_read_rst(log, l_size, true, &rst_info); + if (err) + goto out; + + /* remember 'initialized' */ + *initialized = rst_info.initialized; + + if (!rst_info.restart) { + if (rst_info.initialized) { + /* no restart area but the file is not initialized */ + err = -EINVAL; + goto out; + } + + log_init_pg_hdr(log, page_size, page_size, 1, 1); + log_create(log, l_size, 0, get_random_int(), false, false); + + log->ra = ra; + + ra = log_create_ra(log); + if (!ra) { + err = -ENOMEM; + goto out; + } + log->ra = ra; + log->init_ra = true; + + goto process_log; + } + + /* + * If the restart offset above wasn't zero then we won't + * look for a second restart + */ + if (rst_info.vbo) + goto check_restart_area; + + err = log_read_rst(log, l_size, false, &rst_info2); + + /* Determine which restart area to use */ + if (!rst_info2.restart || rst_info2.last_lsn <= rst_info.last_lsn) + goto use_first_page; + + use_second_page = true; + + if (rst_info.chkdsk_was_run && page_size != rst_info.vbo) { + struct RECORD_PAGE_HDR *sp = NULL; + bool usa_error; + + if (!read_log_page(log, page_size, &sp, &usa_error) && + sp->rhdr.sign == NTFS_CHKD_SIGNATURE) { + use_second_page = false; + } + ntfs_free(sp); + } + + if (use_second_page) { + ntfs_free(rst_info.r_page); + memcpy(&rst_info, &rst_info2, sizeof(struct restart_info)); + rst_info2.r_page = NULL; + } + +use_first_page: + ntfs_free(rst_info2.r_page); + +check_restart_area: + /* If the restart area is at offset 0, we want to write the second restart area first */ + log->init_ra = !!rst_info.vbo; + + /* If we have a valid page then grab a pointer to the restart area */ + ra2 = rst_info.valid_page + ? Add2Ptr(rst_info.r_page, + le16_to_cpu(rst_info.r_page->ra_off)) + : NULL; + + if (rst_info.chkdsk_was_run || + (ra2 && ra2->client_idx[1] == LFS_NO_CLIENT_LE)) { + bool wrapped = false; + bool use_multi_page = false; + u32 open_log_count; + + /* Do some checks based on whether we have a valid log page */ + if (!rst_info.valid_page) { + open_log_count = get_random_int(); + goto init_log_instance; + } + open_log_count = le32_to_cpu(ra2->open_log_count); + + /* + * If the restart page size isn't changing then we want to + * check how much work we need to do + */ + if (page_size != le32_to_cpu(rst_info.r_page->sys_page_size)) + goto init_log_instance; + +init_log_instance: + log_init_pg_hdr(log, page_size, page_size, 1, 1); + + log_create(log, l_size, rst_info.last_lsn, open_log_count, + wrapped, use_multi_page); + + ra = log_create_ra(log); + if (!ra) { + err = -ENOMEM; + goto out; + } + log->ra = ra; + + /* Put the restart areas and initialize the log file as required */ + goto process_log; + } + + if (!ra2) { + err = -EINVAL; + goto out; + } + + /* + * If the log page or the system page sizes have changed, we can't use the log file + * We must use the system page size instead of the default size + * if there is not a clean shutdown + */ + t32 = le32_to_cpu(rst_info.r_page->sys_page_size); + if (page_size != t32) { + l_size = orig_file_size; + page_size = + norm_file_page(t32, &l_size, t32 == DefaultLogPageSize); + } + + if (page_size != t32 || + page_size != le32_to_cpu(rst_info.r_page->page_size)) { + err = -EINVAL; + goto out; + } + + /* If the file size has shrunk then we won't mount it */ + if (l_size < le64_to_cpu(ra2->l_size)) { + err = -EINVAL; + goto out; + } + + log_init_pg_hdr(log, page_size, page_size, + le16_to_cpu(rst_info.r_page->major_ver), + le16_to_cpu(rst_info.r_page->minor_ver)); + + log->l_size = le64_to_cpu(ra2->l_size); + log->seq_num_bits = le32_to_cpu(ra2->seq_num_bits); + log->file_data_bits = sizeof(u64) * 8 - log->seq_num_bits; + log->seq_num_mask = (8 << log->file_data_bits) - 1; + log->last_lsn = le64_to_cpu(ra2->current_lsn); + log->seq_num = log->last_lsn >> log->file_data_bits; + log->ra_off = le16_to_cpu(rst_info.r_page->ra_off); + log->restart_size = log->sys_page_size - log->ra_off; + log->record_header_len = le16_to_cpu(ra2->rec_hdr_len); + log->ra_size = le16_to_cpu(ra2->ra_len); + log->data_off = le16_to_cpu(ra2->data_off); + log->data_size = log->page_size - log->data_off; + log->reserved = log->data_size - log->record_header_len; + + vbo = lsn_to_vbo(log, log->last_lsn); + + if (vbo < log->first_page) { + /* This is a pseudo lsn */ + log->l_flags |= NTFSLOG_NO_LAST_LSN; + log->next_page = log->first_page; + goto find_oldest; + } + + /* Find the end of this log record */ + off = final_log_off(log, log->last_lsn, + le32_to_cpu(ra2->last_lsn_data_len)); + + /* If we wrapped the file then increment the sequence number */ + if (off <= vbo) { + log->seq_num += 1; + log->l_flags |= NTFSLOG_WRAPPED; + } + + /* Now compute the next log page to use */ + vbo &= ~log->sys_page_mask; + tail = log->page_size - (off & log->page_mask) - 1; + + /* If we can fit another log record on the page, move back a page the log file */ + if (tail >= log->record_header_len) { + log->l_flags |= NTFSLOG_REUSE_TAIL; + log->next_page = vbo; + } else { + log->next_page = next_page_off(log, vbo); + } + +find_oldest: + /* Find the oldest client lsn. Use the last flushed lsn as a starting point */ + log->oldest_lsn = log->last_lsn; + oldest_client_lsn(Add2Ptr(ra2, le16_to_cpu(ra2->client_off)), + ra2->client_idx[1], &log->oldest_lsn); + log->oldest_lsn_off = lsn_to_vbo(log, log->oldest_lsn); + + if (log->oldest_lsn_off < log->first_page) + log->l_flags |= NTFSLOG_NO_OLDEST_LSN; + + if (!(ra2->flags & RESTART_SINGLE_PAGE_IO)) + log->l_flags |= NTFSLOG_WRAPPED | NTFSLOG_MULTIPLE_PAGE_IO; + + log->current_openlog_count = le32_to_cpu(ra2->open_log_count); + log->total_avail_pages = log->l_size - log->first_page; + log->total_avail = log->total_avail_pages >> log->page_bits; + log->max_current_avail = log->total_avail * log->reserved; + log->total_avail = log->total_avail * log->data_size; + + log->current_avail = current_log_avail(log); + + ra = ntfs_zalloc(log->restart_size); + if (!ra) { + err = -ENOMEM; + goto out; + } + log->ra = ra; + + t16 = le16_to_cpu(ra2->client_off); + if (t16 == offsetof(struct RESTART_AREA, clients)) { + memcpy(ra, ra2, log->ra_size); + } else { + memcpy(ra, ra2, offsetof(struct RESTART_AREA, clients)); + memcpy(ra->clients, Add2Ptr(ra2, t16), + le16_to_cpu(ra2->ra_len) - t16); + + log->current_openlog_count = get_random_int(); + ra->open_log_count = cpu_to_le32(log->current_openlog_count); + log->ra_size = offsetof(struct RESTART_AREA, clients) + + sizeof(struct CLIENT_REC); + ra->client_off = + cpu_to_le16(offsetof(struct RESTART_AREA, clients)); + ra->ra_len = cpu_to_le16(log->ra_size); + } + + le32_add_cpu(&ra->open_log_count, 1); + + /* Now we need to walk through looking for the last lsn */ + err = last_log_lsn(log); + if (err) + goto out; + + log->current_avail = current_log_avail(log); + + /* Remember which restart area to write first */ + log->init_ra = rst_info.vbo; + +process_log: + /* 1.0, 1.1, 2.0 log->major_ver/minor_ver - short values */ + switch ((log->major_ver << 16) + log->minor_ver) { + case 0x10000: + case 0x10001: + case 0x20000: + break; + default: + ntfs_warn(sbi->sb, "\x24LogFile version %d.%d is not supported", + log->major_ver, log->minor_ver); + err = -EOPNOTSUPP; + log->set_dirty = true; + goto out; + } + + /* One client "NTFS" per logfile */ + ca = Add2Ptr(ra, le16_to_cpu(ra->client_off)); + + for (client = ra->client_idx[1];; client = cr->next_client) { + if (client == LFS_NO_CLIENT_LE) { + /* Insert "NTFS" client LogFile */ + client = ra->client_idx[0]; + if (client == LFS_NO_CLIENT_LE) + return -EINVAL; + + t16 = le16_to_cpu(client); + cr = ca + t16; + + remove_client(ca, cr, &ra->client_idx[0]); + + cr->restart_lsn = 0; + cr->oldest_lsn = cpu_to_le64(log->oldest_lsn); + cr->name_bytes = cpu_to_le32(8); + cr->name[0] = cpu_to_le16('N'); + cr->name[1] = cpu_to_le16('T'); + cr->name[2] = cpu_to_le16('F'); + cr->name[3] = cpu_to_le16('S'); + + add_client(ca, t16, &ra->client_idx[1]); + break; + } + + cr = ca + le16_to_cpu(client); + + if (cpu_to_le32(8) == cr->name_bytes && + cpu_to_le16('N') == cr->name[0] && + cpu_to_le16('T') == cr->name[1] && + cpu_to_le16('F') == cr->name[2] && + cpu_to_le16('S') == cr->name[3]) + break; + } + + /* Update the client handle with the client block information */ + log->client_id.seq_num = cr->seq_num; + log->client_id.client_idx = client; + + err = read_rst_area(log, &rst, &ra_lsn); + if (err) + goto out; + + if (!rst) + goto out; + + bytes_per_attr_entry = !rst->major_ver ? 0x2C : 0x28; + + checkpt_lsn = le64_to_cpu(rst->check_point_start); + if (!checkpt_lsn) + checkpt_lsn = ra_lsn; + + /* Allocate and Read the Transaction Table */ + if (!rst->transact_table_len) + goto check_dirty_page_table; + + t64 = le64_to_cpu(rst->transact_table_lsn); + err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb); + if (err) + goto out; + + lrh = lcb->log_rec; + frh = lcb->lrh; + rec_len = le32_to_cpu(frh->client_data_len); + + if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id), + bytes_per_attr_entry)) { + err = -EINVAL; + goto out; + } + + t16 = le16_to_cpu(lrh->redo_off); + + rt = Add2Ptr(lrh, t16); + t32 = rec_len - t16; + + /* Now check that this is a valid restart table */ + if (!check_rstbl(rt, t32)) { + err = -EINVAL; + goto out; + } + + trtbl = ntfs_memdup(rt, t32); + if (!trtbl) { + err = -ENOMEM; + goto out; + } + + lcb_put(lcb); + lcb = NULL; + +check_dirty_page_table: + /* The next record back should be the Dirty Pages Table */ + if (!rst->dirty_pages_len) + goto check_attribute_names; + + t64 = le64_to_cpu(rst->dirty_pages_table_lsn); + err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb); + if (err) + goto out; + + lrh = lcb->log_rec; + frh = lcb->lrh; + rec_len = le32_to_cpu(frh->client_data_len); + + if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id), + bytes_per_attr_entry)) { + err = -EINVAL; + goto out; + } + + t16 = le16_to_cpu(lrh->redo_off); + + rt = Add2Ptr(lrh, t16); + t32 = rec_len - t16; + + /* Now check that this is a valid restart table */ + if (!check_rstbl(rt, t32)) { + err = -EINVAL; + goto out; + } + + dptbl = ntfs_memdup(rt, t32); + if (!dptbl) { + err = -ENOMEM; + goto out; + } + + /* Convert Ra version '0' into version '1' */ + if (rst->major_ver) + goto end_conv_1; + + dp = NULL; + while ((dp = enum_rstbl(dptbl, dp))) { + struct DIR_PAGE_ENTRY_32 *dp0 = (struct DIR_PAGE_ENTRY_32 *)dp; + // NOTE: Danger. Check for of boundary + memmove(&dp->vcn, &dp0->vcn_low, + 2 * sizeof(u64) + + le32_to_cpu(dp->lcns_follow) * sizeof(u64)); + } + +end_conv_1: + lcb_put(lcb); + lcb = NULL; + + /* Go through the table and remove the duplicates, remembering the oldest lsn values */ + if (sbi->cluster_size <= log->page_size) + goto trace_dp_table; + + dp = NULL; + while ((dp = enum_rstbl(dptbl, dp))) { + struct DIR_PAGE_ENTRY *next = dp; + + while ((next = enum_rstbl(dptbl, next))) { + if (next->target_attr == dp->target_attr && + next->vcn == dp->vcn) { + if (le64_to_cpu(next->oldest_lsn) < + le64_to_cpu(dp->oldest_lsn)) { + dp->oldest_lsn = next->oldest_lsn; + } + + free_rsttbl_idx(dptbl, PtrOffset(dptbl, next)); + } + } + } +trace_dp_table: +check_attribute_names: + /* The next record should be the Attribute Names */ + if (!rst->attr_names_len) + goto check_attr_table; + + t64 = le64_to_cpu(rst->attr_names_lsn); + err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb); + if (err) + goto out; + + lrh = lcb->log_rec; + frh = lcb->lrh; + rec_len = le32_to_cpu(frh->client_data_len); + + if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id), + bytes_per_attr_entry)) { + err = -EINVAL; + goto out; + } + + t32 = lrh_length(lrh); + rec_len -= t32; + + attr_names = ntfs_memdup(Add2Ptr(lrh, t32), rec_len); + + lcb_put(lcb); + lcb = NULL; + +check_attr_table: + /* The next record should be the attribute Table */ + if (!rst->open_attr_len) + goto check_attribute_names2; + + t64 = le64_to_cpu(rst->open_attr_table_lsn); + err = read_log_rec_lcb(log, t64, lcb_ctx_prev, &lcb); + if (err) + goto out; + + lrh = lcb->log_rec; + frh = lcb->lrh; + rec_len = le32_to_cpu(frh->client_data_len); + + if (!check_log_rec(lrh, rec_len, le32_to_cpu(frh->transact_id), + bytes_per_attr_entry)) { + err = -EINVAL; + goto out; + } + + t16 = le16_to_cpu(lrh->redo_off); + + rt = Add2Ptr(lrh, t16); + t32 = rec_len - t16; + + if (!check_rstbl(rt, t32)) { + err = -EINVAL; + goto out; + } + + oatbl = ntfs_memdup(rt, t32); + if (!oatbl) { + err = -ENOMEM; + goto out; + } + + log->open_attr_tbl = oatbl; + + /* Clear all of the Attr pointers */ + oe = NULL; + while ((oe = enum_rstbl(oatbl, oe))) { + if (!rst->major_ver) { + struct OPEN_ATTR_ENRTY_32 oe0; + + /* Really 'oe' points to OPEN_ATTR_ENRTY_32 */ + memcpy(&oe0, oe, SIZEOF_OPENATTRIBUTEENTRY0); + + oe->bytes_per_index = oe0.bytes_per_index; + oe->type = oe0.type; + oe->is_dirty_pages = oe0.is_dirty_pages; + oe->name_len = 0; + oe->ref = oe0.ref; + oe->open_record_lsn = oe0.open_record_lsn; + } + + oe->is_attr_name = 0; + oe->ptr = NULL; + } + + lcb_put(lcb); + lcb = NULL; + +check_attribute_names2: + if (!rst->attr_names_len) + goto trace_attribute_table; + + ane = attr_names; + if (!oatbl) + goto trace_attribute_table; + while (ane->off) { + /* TODO: Clear table on exit! */ + oe = Add2Ptr(oatbl, le16_to_cpu(ane->off)); + t16 = le16_to_cpu(ane->name_bytes); + oe->name_len = t16 / sizeof(short); + oe->ptr = ane->name; + oe->is_attr_name = 2; + ane = Add2Ptr(ane, sizeof(struct ATTR_NAME_ENTRY) + t16); + } + +trace_attribute_table: + /* + * If the checkpt_lsn is zero, then this is a freshly + * formatted disk and we have no work to do + */ + if (!checkpt_lsn) { + err = 0; + goto out; + } + + if (!oatbl) { + oatbl = init_rsttbl(bytes_per_attr_entry, 8); + if (!oatbl) { + err = -ENOMEM; + goto out; + } + } + + log->open_attr_tbl = oatbl; + + /* Start the analysis pass from the Checkpoint lsn. */ + rec_lsn = checkpt_lsn; + + /* Read the first lsn */ + err = read_log_rec_lcb(log, checkpt_lsn, lcb_ctx_next, &lcb); + if (err) + goto out; + + /* Loop to read all subsequent records to the end of the log file */ +next_log_record_analyze: + err = read_next_log_rec(log, lcb, &rec_lsn); + if (err) + goto out; + + if (!rec_lsn) + goto end_log_records_enumerate; + + frh = lcb->lrh; + transact_id = le32_to_cpu(frh->transact_id); + rec_len = le32_to_cpu(frh->client_data_len); + lrh = lcb->log_rec; + + if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) { + err = -EINVAL; + goto out; + } + + /* + * The first lsn after the previous lsn remembered + * the checkpoint is the first candidate for the rlsn + */ + if (!rlsn) + rlsn = rec_lsn; + + if (LfsClientRecord != frh->record_type) + goto next_log_record_analyze; + + /* + * Now update the Transaction Table for this transaction + * If there is no entry present or it is unallocated we allocate the entry + */ + if (!trtbl) { + trtbl = init_rsttbl(sizeof(struct TRANSACTION_ENTRY), + INITIAL_NUMBER_TRANSACTIONS); + if (!trtbl) { + err = -ENOMEM; + goto out; + } + } + + tr = Add2Ptr(trtbl, transact_id); + + if (transact_id >= bytes_per_rt(trtbl) || + tr->next != RESTART_ENTRY_ALLOCATED_LE) { + tr = alloc_rsttbl_from_idx(&trtbl, transact_id); + if (!tr) { + err = -ENOMEM; + goto out; + } + tr->transact_state = TransactionActive; + tr->first_lsn = cpu_to_le64(rec_lsn); + } + + tr->prev_lsn = tr->undo_next_lsn = cpu_to_le64(rec_lsn); + + /* + * If this is a compensation log record, then change + * the undo_next_lsn to be the undo_next_lsn of this record + */ + if (lrh->undo_op == cpu_to_le16(CompensationLogRecord)) + tr->undo_next_lsn = frh->client_undo_next_lsn; + + /* Dispatch to handle log record depending on type */ + switch (le16_to_cpu(lrh->redo_op)) { + case InitializeFileRecordSegment: + case DeallocateFileRecordSegment: + case WriteEndOfFileRecordSegment: + case CreateAttribute: + case DeleteAttribute: + case UpdateResidentValue: + case UpdateNonresidentValue: + case UpdateMappingPairs: + case SetNewAttributeSizes: + case AddIndexEntryRoot: + case DeleteIndexEntryRoot: + case AddIndexEntryAllocation: + case DeleteIndexEntryAllocation: + case WriteEndOfIndexBuffer: + case SetIndexEntryVcnRoot: + case SetIndexEntryVcnAllocation: + case UpdateFileNameRoot: + case UpdateFileNameAllocation: + case SetBitsInNonresidentBitMap: + case ClearBitsInNonresidentBitMap: + case UpdateRecordDataRoot: + case UpdateRecordDataAllocation: + case ZeroEndOfFileRecord: + t16 = le16_to_cpu(lrh->target_attr); + t64 = le64_to_cpu(lrh->target_vcn); + dp = find_dp(dptbl, t16, t64); + + if (dp) + goto copy_lcns; + + /* + * Calculate the number of clusters per page the system + * which wrote the checkpoint, possibly creating the table + */ + if (dptbl) { + t32 = 1 + (le16_to_cpu(dptbl->size) - + sizeof(struct DIR_PAGE_ENTRY)) / + sizeof(u64); + } else { + t32 = log->clst_per_page; + ntfs_free(dptbl); + dptbl = init_rsttbl(sizeof(struct DIR_PAGE_ENTRY) + + (t32 - 1) * sizeof(u64), + 32); + if (!dptbl) { + err = -ENOMEM; + goto out; + } + } + + dp = alloc_rsttbl_idx(&dptbl); + dp->target_attr = cpu_to_le32(t16); + dp->transfer_len = cpu_to_le32(t32 << sbi->cluster_bits); + dp->lcns_follow = cpu_to_le32(t32); + dp->vcn = cpu_to_le64(t64 & ~((u64)t32 - 1)); + dp->oldest_lsn = cpu_to_le64(rec_lsn); + +copy_lcns: + /* + * Copy the Lcns from the log record into the Dirty Page Entry + * TODO: for different page size support, must somehow make + * whole routine a loop, case Lcns do not fit below + */ + t16 = le16_to_cpu(lrh->lcns_follow); + for (i = 0; i < t16; i++) { + size_t j = (size_t)(le64_to_cpu(lrh->target_vcn) - + le64_to_cpu(dp->vcn)); + dp->page_lcns[j + i] = lrh->page_lcns[i]; + } + + goto next_log_record_analyze; + + case DeleteDirtyClusters: { + u32 range_count = + le16_to_cpu(lrh->redo_len) / sizeof(struct LCN_RANGE); + const struct LCN_RANGE *r = + Add2Ptr(lrh, le16_to_cpu(lrh->redo_off)); + + /* Loop through all of the Lcn ranges this log record */ + for (i = 0; i < range_count; i++, r++) { + u64 lcn0 = le64_to_cpu(r->lcn); + u64 lcn_e = lcn0 + le64_to_cpu(r->len) - 1; + + dp = NULL; + while ((dp = enum_rstbl(dptbl, dp))) { + u32 j; + + t32 = le32_to_cpu(dp->lcns_follow); + for (j = 0; j < t32; j++) { + t64 = le64_to_cpu(dp->page_lcns[j]); + if (t64 >= lcn0 && t64 <= lcn_e) + dp->page_lcns[j] = 0; + } + } + } + goto next_log_record_analyze; + ; + } + + case OpenNonresidentAttribute: + t16 = le16_to_cpu(lrh->target_attr); + if (t16 >= bytes_per_rt(oatbl)) { + /* + * Compute how big the table needs to be. + * Add 10 extra entries for some cushion + */ + u32 new_e = t16 / le16_to_cpu(oatbl->size); + + new_e += 10 - le16_to_cpu(oatbl->used); + + oatbl = extend_rsttbl(oatbl, new_e, ~0u); + log->open_attr_tbl = oatbl; + if (!oatbl) { + err = -ENOMEM; + goto out; + } + } + + /* Point to the entry being opened */ + oe = alloc_rsttbl_from_idx(&oatbl, t16); + log->open_attr_tbl = oatbl; + if (!oe) { + err = -ENOMEM; + goto out; + } + + /* Initialize this entry from the log record */ + t16 = le16_to_cpu(lrh->redo_off); + if (!rst->major_ver) { + /* Convert version '0' into version '1' */ + struct OPEN_ATTR_ENRTY_32 *oe0 = Add2Ptr(lrh, t16); + + oe->bytes_per_index = oe0->bytes_per_index; + oe->type = oe0->type; + oe->is_dirty_pages = oe0->is_dirty_pages; + oe->name_len = 0; //oe0.name_len; + oe->ref = oe0->ref; + oe->open_record_lsn = oe0->open_record_lsn; + } else { + memcpy(oe, Add2Ptr(lrh, t16), bytes_per_attr_entry); + } + + t16 = le16_to_cpu(lrh->undo_len); + if (t16) { + oe->ptr = ntfs_malloc(t16); + if (!oe->ptr) { + err = -ENOMEM; + goto out; + } + oe->name_len = t16 / sizeof(short); + memcpy(oe->ptr, + Add2Ptr(lrh, le16_to_cpu(lrh->undo_off)), t16); + oe->is_attr_name = 1; + } else { + oe->ptr = NULL; + oe->is_attr_name = 0; + } + + goto next_log_record_analyze; + + case HotFix: + t16 = le16_to_cpu(lrh->target_attr); + t64 = le64_to_cpu(lrh->target_vcn); + dp = find_dp(dptbl, t16, t64); + if (dp) { + size_t j = le64_to_cpu(lrh->target_vcn) - + le64_to_cpu(dp->vcn); + if (dp->page_lcns[j]) + dp->page_lcns[j] = lrh->page_lcns[0]; + } + goto next_log_record_analyze; + + case EndTopLevelAction: + tr = Add2Ptr(trtbl, transact_id); + tr->prev_lsn = cpu_to_le64(rec_lsn); + tr->undo_next_lsn = frh->client_undo_next_lsn; + goto next_log_record_analyze; + + case PrepareTransaction: + tr = Add2Ptr(trtbl, transact_id); + tr->transact_state = TransactionPrepared; + goto next_log_record_analyze; + + case CommitTransaction: + tr = Add2Ptr(trtbl, transact_id); + tr->transact_state = TransactionCommitted; + goto next_log_record_analyze; + + case ForgetTransaction: + free_rsttbl_idx(trtbl, transact_id); + goto next_log_record_analyze; + + case Noop: + case OpenAttributeTableDump: + case AttributeNamesDump: + case DirtyPageTableDump: + case TransactionTableDump: + /* The following cases require no action the Analysis Pass */ + goto next_log_record_analyze; + + default: + /* + * All codes will be explicitly handled. + * If we see a code we do not expect, then we are trouble + */ + goto next_log_record_analyze; + } + +end_log_records_enumerate: + lcb_put(lcb); + lcb = NULL; + + /* + * Scan the Dirty Page Table and Transaction Table for + * the lowest lsn, and return it as the Redo lsn + */ + dp = NULL; + while ((dp = enum_rstbl(dptbl, dp))) { + t64 = le64_to_cpu(dp->oldest_lsn); + if (t64 && t64 < rlsn) + rlsn = t64; + } + + tr = NULL; + while ((tr = enum_rstbl(trtbl, tr))) { + t64 = le64_to_cpu(tr->first_lsn); + if (t64 && t64 < rlsn) + rlsn = t64; + } + + /* Only proceed if the Dirty Page Table or Transaction table are not empty */ + if ((!dptbl || !dptbl->total) && (!trtbl || !trtbl->total)) + goto end_reply; + + sbi->flags |= NTFS_FLAGS_NEED_REPLAY; + if (is_ro) + goto out; + + /* Reopen all of the attributes with dirty pages */ + oe = NULL; +next_open_attribute: + + oe = enum_rstbl(oatbl, oe); + if (!oe) { + err = 0; + dp = NULL; + goto next_dirty_page; + } + + oa = ntfs_zalloc(sizeof(struct OpenAttr)); + if (!oa) { + err = -ENOMEM; + goto out; + } + + inode = ntfs_iget5(sbi->sb, &oe->ref, NULL); + if (IS_ERR(inode)) + goto fake_attr; + + if (is_bad_inode(inode)) { + iput(inode); +fake_attr: + if (oa->ni) { + iput(&oa->ni->vfs_inode); + oa->ni = NULL; + } + + attr = attr_create_nonres_log(sbi, oe->type, 0, oe->ptr, + oe->name_len, 0); + if (!attr) { + ntfs_free(oa); + err = -ENOMEM; + goto out; + } + oa->attr = attr; + oa->run1 = &oa->run0; + goto final_oe; + } + + ni_oe = ntfs_i(inode); + oa->ni = ni_oe; + + attr = ni_find_attr(ni_oe, NULL, NULL, oe->type, oe->ptr, oe->name_len, + NULL, NULL); + + if (!attr) + goto fake_attr; + + t32 = le32_to_cpu(attr->size); + oa->attr = ntfs_memdup(attr, t32); + if (!oa->attr) + goto fake_attr; + + if (!S_ISDIR(inode->i_mode)) { + if (attr->type == ATTR_DATA && !attr->name_len) { + oa->run1 = &ni_oe->file.run; + goto final_oe; + } + } else { + if (attr->type == ATTR_ALLOC && + attr->name_len == ARRAY_SIZE(I30_NAME) && + !memcmp(attr_name(attr), I30_NAME, sizeof(I30_NAME))) { + oa->run1 = &ni_oe->dir.alloc_run; + goto final_oe; + } + } + + if (attr->non_res) { + u16 roff = le16_to_cpu(attr->nres.run_off); + CLST svcn = le64_to_cpu(attr->nres.svcn); + + err = run_unpack(&oa->run0, sbi, inode->i_ino, svcn, + le64_to_cpu(attr->nres.evcn), svcn, + Add2Ptr(attr, roff), t32 - roff); + if (err < 0) { + ntfs_free(oa->attr); + oa->attr = NULL; + goto fake_attr; + } + err = 0; + } + oa->run1 = &oa->run0; + attr = oa->attr; + +final_oe: + if (oe->is_attr_name == 1) + ntfs_free(oe->ptr); + oe->is_attr_name = 0; + oe->ptr = oa; + oe->name_len = attr->name_len; + + goto next_open_attribute; + + /* + * Now loop through the dirty page table to extract all of the Vcn/Lcn + * Mapping that we have, and insert it into the appropriate run + */ +next_dirty_page: + dp = enum_rstbl(dptbl, dp); + if (!dp) + goto do_redo_1; + + oe = Add2Ptr(oatbl, le32_to_cpu(dp->target_attr)); + + if (oe->next != RESTART_ENTRY_ALLOCATED_LE) + goto next_dirty_page; + + oa = oe->ptr; + if (!oa) + goto next_dirty_page; + + i = -1; +next_dirty_page_vcn: + i += 1; + if (i >= le32_to_cpu(dp->lcns_follow)) + goto next_dirty_page; + + vcn = le64_to_cpu(dp->vcn) + i; + size = (vcn + 1) << sbi->cluster_bits; + + if (!dp->page_lcns[i]) + goto next_dirty_page_vcn; + + rno = ino_get(&oe->ref); + if (rno <= MFT_REC_MIRR && + size < (MFT_REC_VOL + 1) * sbi->record_size && + oe->type == ATTR_DATA) { + goto next_dirty_page_vcn; + } + + lcn = le64_to_cpu(dp->page_lcns[i]); + + if ((!run_lookup_entry(oa->run1, vcn, &lcn0, &len0, NULL) || + lcn0 != lcn) && + !run_add_entry(oa->run1, vcn, lcn, 1, false)) { + err = -ENOMEM; + goto out; + } + attr = oa->attr; + t64 = le64_to_cpu(attr->nres.alloc_size); + if (size > t64) { + attr->nres.valid_size = attr->nres.data_size = + attr->nres.alloc_size = cpu_to_le64(size); + } + goto next_dirty_page_vcn; + +do_redo_1: + /* + * Perform the Redo Pass, to restore all of the dirty pages to the same + * contents that they had immediately before the crash + * If the dirty page table is empty, then we can skip the entire Redo Pass + */ + if (!dptbl || !dptbl->total) + goto do_undo_action; + + rec_lsn = rlsn; + + /* + * Read the record at the Redo lsn, before falling + * into common code to handle each record + */ + err = read_log_rec_lcb(log, rlsn, lcb_ctx_next, &lcb); + if (err) + goto out; + + /* + * Now loop to read all of our log records forwards, + * until we hit the end of the file, cleaning up at the end + */ +do_action_next: + frh = lcb->lrh; + + if (LfsClientRecord != frh->record_type) + goto read_next_log_do_action; + + transact_id = le32_to_cpu(frh->transact_id); + rec_len = le32_to_cpu(frh->client_data_len); + lrh = lcb->log_rec; + + if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) { + err = -EINVAL; + goto out; + } + + /* Ignore log records that do not update pages */ + if (lrh->lcns_follow) + goto find_dirty_page; + + goto read_next_log_do_action; + +find_dirty_page: + t16 = le16_to_cpu(lrh->target_attr); + t64 = le64_to_cpu(lrh->target_vcn); + dp = find_dp(dptbl, t16, t64); + + if (!dp) + goto read_next_log_do_action; + + if (rec_lsn < le64_to_cpu(dp->oldest_lsn)) + goto read_next_log_do_action; + + t16 = le16_to_cpu(lrh->target_attr); + if (t16 >= bytes_per_rt(oatbl)) { + err = -EINVAL; + goto out; + } + + oe = Add2Ptr(oatbl, t16); + + if (oe->next != RESTART_ENTRY_ALLOCATED_LE) { + err = -EINVAL; + goto out; + } + + oa = oe->ptr; + + if (!oa) { + err = -EINVAL; + goto out; + } + attr = oa->attr; + + vcn = le64_to_cpu(lrh->target_vcn); + + if (!run_lookup_entry(oa->run1, vcn, &lcn, NULL, NULL) || + lcn == SPARSE_LCN) { + goto read_next_log_do_action; + } + + /* Point to the Redo data and get its length */ + data = Add2Ptr(lrh, le16_to_cpu(lrh->redo_off)); + dlen = le16_to_cpu(lrh->redo_len); + + /* Shorten length by any Lcns which were deleted */ + saved_len = dlen; + + for (i = le16_to_cpu(lrh->lcns_follow); i; i--) { + size_t j; + u32 alen, voff; + + voff = le16_to_cpu(lrh->record_off) + + le16_to_cpu(lrh->attr_off); + voff += le16_to_cpu(lrh->cluster_off) << SECTOR_SHIFT; + + /* If the Vcn question is allocated, we can just get out.*/ + j = le64_to_cpu(lrh->target_vcn) - le64_to_cpu(dp->vcn); + if (dp->page_lcns[j + i - 1]) + break; + + if (!saved_len) + saved_len = 1; + + /* + * Calculate the allocated space left relative to the + * log record Vcn, after removing this unallocated Vcn + */ + alen = (i - 1) << sbi->cluster_bits; + + /* + * If the update described this log record goes beyond + * the allocated space, then we will have to reduce the length + */ + if (voff >= alen) + dlen = 0; + else if (voff + dlen > alen) + dlen = alen - voff; + } + + /* If the resulting dlen from above is now zero, we can skip this log record */ + if (!dlen && saved_len) + goto read_next_log_do_action; + + t16 = le16_to_cpu(lrh->redo_op); + if (can_skip_action(t16)) + goto read_next_log_do_action; + + /* Apply the Redo operation a common routine */ + err = do_action(log, oe, lrh, t16, data, dlen, rec_len, &rec_lsn); + if (err) + goto out; + + /* Keep reading and looping back until end of file */ +read_next_log_do_action: + err = read_next_log_rec(log, lcb, &rec_lsn); + if (!err && rec_lsn) + goto do_action_next; + + lcb_put(lcb); + lcb = NULL; + +do_undo_action: + /* Scan Transaction Table */ + tr = NULL; +transaction_table_next: + tr = enum_rstbl(trtbl, tr); + if (!tr) + goto undo_action_done; + + if (TransactionActive != tr->transact_state || !tr->undo_next_lsn) { + free_rsttbl_idx(trtbl, PtrOffset(trtbl, tr)); + goto transaction_table_next; + } + + log->transaction_id = PtrOffset(trtbl, tr); + undo_next_lsn = le64_to_cpu(tr->undo_next_lsn); + + /* + * We only have to do anything if the transaction has + * something its undo_next_lsn field + */ + if (!undo_next_lsn) + goto commit_undo; + + /* Read the first record to be undone by this transaction */ + err = read_log_rec_lcb(log, undo_next_lsn, lcb_ctx_undo_next, &lcb); + if (err) + goto out; + + /* + * Now loop to read all of our log records forwards, + * until we hit the end of the file, cleaning up at the end + */ +undo_action_next: + + lrh = lcb->log_rec; + frh = lcb->lrh; + transact_id = le32_to_cpu(frh->transact_id); + rec_len = le32_to_cpu(frh->client_data_len); + + if (!check_log_rec(lrh, rec_len, transact_id, bytes_per_attr_entry)) { + err = -EINVAL; + goto out; + } + + if (lrh->undo_op == cpu_to_le16(Noop)) + goto read_next_log_undo_action; + + oe = Add2Ptr(oatbl, le16_to_cpu(lrh->target_attr)); + oa = oe->ptr; + + t16 = le16_to_cpu(lrh->lcns_follow); + if (!t16) + goto add_allocated_vcns; + + is_mapped = run_lookup_entry(oa->run1, le64_to_cpu(lrh->target_vcn), + &lcn, &clen, NULL); + + /* + * If the mapping isn't already the table or the mapping + * corresponds to a hole the mapping, we need to make sure + * there is no partial page already memory + */ + if (is_mapped && lcn != SPARSE_LCN && clen >= t16) + goto add_allocated_vcns; + + vcn = le64_to_cpu(lrh->target_vcn); + vcn &= ~(log->clst_per_page - 1); + +add_allocated_vcns: + for (i = 0, vcn = le64_to_cpu(lrh->target_vcn), + size = (vcn + 1) << sbi->cluster_bits; + i < t16; i++, vcn += 1, size += sbi->cluster_size) { + attr = oa->attr; + if (!attr->non_res) { + if (size > le32_to_cpu(attr->res.data_size)) + attr->res.data_size = cpu_to_le32(size); + } else { + if (size > le64_to_cpu(attr->nres.data_size)) + attr->nres.valid_size = attr->nres.data_size = + attr->nres.alloc_size = + cpu_to_le64(size); + } + } + + t16 = le16_to_cpu(lrh->undo_op); + if (can_skip_action(t16)) + goto read_next_log_undo_action; + + /* Point to the Redo data and get its length */ + data = Add2Ptr(lrh, le16_to_cpu(lrh->undo_off)); + dlen = le16_to_cpu(lrh->undo_len); + + /* it is time to apply the undo action */ + err = do_action(log, oe, lrh, t16, data, dlen, rec_len, NULL); + +read_next_log_undo_action: + /* + * Keep reading and looping back until we have read the + * last record for this transaction + */ + err = read_next_log_rec(log, lcb, &rec_lsn); + if (err) + goto out; + + if (rec_lsn) + goto undo_action_next; + +commit_undo: + free_rsttbl_idx(trtbl, log->transaction_id); + + log->transaction_id = 0; + + goto transaction_table_next; + +undo_action_done: + + ntfs_update_mftmirr(sbi, 0); + + sbi->flags &= ~NTFS_FLAGS_NEED_REPLAY; + +end_reply: + + err = 0; + if (is_ro) + goto out; + + rh = ntfs_zalloc(log->page_size); + if (!rh) { + err = -ENOMEM; + goto out; + } + + rh->rhdr.sign = NTFS_RSTR_SIGNATURE; + rh->rhdr.fix_off = cpu_to_le16(offsetof(struct RESTART_HDR, fixups)); + t16 = (log->page_size >> SECTOR_SHIFT) + 1; + rh->rhdr.fix_num = cpu_to_le16(t16); + rh->sys_page_size = cpu_to_le32(log->page_size); + rh->page_size = cpu_to_le32(log->page_size); + + t16 = QuadAlign(offsetof(struct RESTART_HDR, fixups) + + sizeof(short) * t16); + rh->ra_off = cpu_to_le16(t16); + rh->minor_ver = cpu_to_le16(1); // 0x1A: + rh->major_ver = cpu_to_le16(1); // 0x1C: + + ra2 = Add2Ptr(rh, t16); + memcpy(ra2, ra, sizeof(struct RESTART_AREA)); + + ra2->client_idx[0] = 0; + ra2->client_idx[1] = LFS_NO_CLIENT_LE; + ra2->flags = cpu_to_le16(2); + + le32_add_cpu(&ra2->open_log_count, 1); + + ntfs_fix_pre_write(&rh->rhdr, log->page_size); + + err = ntfs_sb_write_run(sbi, &ni->file.run, 0, rh, log->page_size); + if (!err) + err = ntfs_sb_write_run(sbi, &log->ni->file.run, log->page_size, + rh, log->page_size); + + ntfs_free(rh); + if (err) + goto out; + +out: + ntfs_free(rst); + if (lcb) + lcb_put(lcb); + + /* Scan the Open Attribute Table to close all of the open attributes */ + oe = NULL; + while ((oe = enum_rstbl(oatbl, oe))) { + rno = ino_get(&oe->ref); + + if (oe->is_attr_name == 1) { + ntfs_free(oe->ptr); + oe->ptr = NULL; + continue; + } + + if (oe->is_attr_name) + continue; + + oa = oe->ptr; + if (!oa) + continue; + + run_close(&oa->run0); + ntfs_free(oa->attr); + if (oa->ni) + iput(&oa->ni->vfs_inode); + ntfs_free(oa); + } + + ntfs_free(trtbl); + ntfs_free(oatbl); + ntfs_free(dptbl); + ntfs_free(attr_names); + ntfs_free(rst_info.r_page); + + ntfs_free(ra); + ntfs_free(log->one_page_buf); + + if (err) + sbi->flags |= NTFS_FLAGS_NEED_REPLAY; + + if (err == -EROFS) + err = 0; + else if (log->set_dirty) + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + + ntfs_free(log); + + return err; +} diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c new file mode 100644 index 000000000000..f1d7cea2ca22 --- /dev/null +++ b/fs/ntfs3/fsntfs.c @@ -0,0 +1,2543 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + */ + +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" + +// clang-format off +const struct cpu_str NAME_MFT = { + 4, 0, { '$', 'M', 'F', 'T' }, +}; +const struct cpu_str NAME_MIRROR = { + 8, 0, { '$', 'M', 'F', 'T', 'M', 'i', 'r', 'r' }, +}; +const struct cpu_str NAME_LOGFILE = { + 8, 0, { '$', 'L', 'o', 'g', 'F', 'i', 'l', 'e' }, +}; +const struct cpu_str NAME_VOLUME = { + 7, 0, { '$', 'V', 'o', 'l', 'u', 'm', 'e' }, +}; +const struct cpu_str NAME_ATTRDEF = { + 8, 0, { '$', 'A', 't', 't', 'r', 'D', 'e', 'f' }, +}; +const struct cpu_str NAME_ROOT = { + 1, 0, { '.' }, +}; +const struct cpu_str NAME_BITMAP = { + 7, 0, { '$', 'B', 'i', 't', 'm', 'a', 'p' }, +}; +const struct cpu_str NAME_BOOT = { + 5, 0, { '$', 'B', 'o', 'o', 't' }, +}; +const struct cpu_str NAME_BADCLUS = { + 8, 0, { '$', 'B', 'a', 'd', 'C', 'l', 'u', 's' }, +}; +const struct cpu_str NAME_QUOTA = { + 6, 0, { '$', 'Q', 'u', 'o', 't', 'a' }, +}; +const struct cpu_str NAME_SECURE = { + 7, 0, { '$', 'S', 'e', 'c', 'u', 'r', 'e' }, +}; +const struct cpu_str NAME_UPCASE = { + 7, 0, { '$', 'U', 'p', 'C', 'a', 's', 'e' }, +}; +const struct cpu_str NAME_EXTEND = { + 7, 0, { '$', 'E', 'x', 't', 'e', 'n', 'd' }, +}; +const struct cpu_str NAME_OBJID = { + 6, 0, { '$', 'O', 'b', 'j', 'I', 'd' }, +}; +const struct cpu_str NAME_REPARSE = { + 8, 0, { '$', 'R', 'e', 'p', 'a', 'r', 's', 'e' }, +}; +const struct cpu_str NAME_USNJRNL = { + 8, 0, { '$', 'U', 's', 'n', 'J', 'r', 'n', 'l' }, +}; +const __le16 BAD_NAME[4] = { + cpu_to_le16('$'), cpu_to_le16('B'), cpu_to_le16('a'), cpu_to_le16('d'), +}; +const __le16 I30_NAME[4] = { + cpu_to_le16('$'), cpu_to_le16('I'), cpu_to_le16('3'), cpu_to_le16('0'), +}; +const __le16 SII_NAME[4] = { + cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('I'), cpu_to_le16('I'), +}; +const __le16 SDH_NAME[4] = { + cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('D'), cpu_to_le16('H'), +}; +const __le16 SDS_NAME[4] = { + cpu_to_le16('$'), cpu_to_le16('S'), cpu_to_le16('D'), cpu_to_le16('S'), +}; +const __le16 SO_NAME[2] = { + cpu_to_le16('$'), cpu_to_le16('O'), +}; +const __le16 SQ_NAME[2] = { + cpu_to_le16('$'), cpu_to_le16('Q'), +}; +const __le16 SR_NAME[2] = { + cpu_to_le16('$'), cpu_to_le16('R'), +}; + +#ifdef CONFIG_NTFS3_LZX_XPRESS +const __le16 WOF_NAME[17] = { + cpu_to_le16('W'), cpu_to_le16('o'), cpu_to_le16('f'), cpu_to_le16('C'), + cpu_to_le16('o'), cpu_to_le16('m'), cpu_to_le16('p'), cpu_to_le16('r'), + cpu_to_le16('e'), cpu_to_le16('s'), cpu_to_le16('s'), cpu_to_le16('e'), + cpu_to_le16('d'), cpu_to_le16('D'), cpu_to_le16('a'), cpu_to_le16('t'), + cpu_to_le16('a'), +}; +#endif + +// clang-format on + +/* + * ntfs_fix_pre_write + * + * inserts fixups into 'rhdr' before writing to disk + */ +bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes) +{ + u16 *fixup, *ptr; + u16 sample; + u16 fo = le16_to_cpu(rhdr->fix_off); + u16 fn = le16_to_cpu(rhdr->fix_num); + + if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- || + fn * SECTOR_SIZE > bytes) { + return false; + } + + /* Get fixup pointer */ + fixup = Add2Ptr(rhdr, fo); + + if (*fixup >= 0x7FFF) + *fixup = 1; + else + *fixup += 1; + + sample = *fixup; + + ptr = Add2Ptr(rhdr, SECTOR_SIZE - sizeof(short)); + + while (fn--) { + *++fixup = *ptr; + *ptr = sample; + ptr += SECTOR_SIZE / sizeof(short); + } + return true; +} + +/* + * ntfs_fix_post_read + * + * remove fixups after reading from disk + * Returns < 0 if error, 0 if ok, 1 if need to update fixups + */ +int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes, + bool simple) +{ + int ret; + u16 *fixup, *ptr; + u16 sample, fo, fn; + + fo = le16_to_cpu(rhdr->fix_off); + fn = simple ? ((bytes >> SECTOR_SHIFT) + 1) + : le16_to_cpu(rhdr->fix_num); + + /* Check errors */ + if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- || + fn * SECTOR_SIZE > bytes) { + return -EINVAL; /* native chkntfs returns ok! */ + } + + /* Get fixup pointer */ + fixup = Add2Ptr(rhdr, fo); + sample = *fixup; + ptr = Add2Ptr(rhdr, SECTOR_SIZE - sizeof(short)); + ret = 0; + + while (fn--) { + /* Test current word */ + if (*ptr != sample) { + /* Fixup does not match! Is it serious error? */ + ret = -E_NTFS_FIXUP; + } + + /* Replace fixup */ + *ptr = *++fixup; + ptr += SECTOR_SIZE / sizeof(short); + } + + return ret; +} + +/* + * ntfs_extend_init + * + * loads $Extend file + */ +int ntfs_extend_init(struct ntfs_sb_info *sbi) +{ + int err; + struct super_block *sb = sbi->sb; + struct inode *inode, *inode2; + struct MFT_REF ref; + + if (sbi->volume.major_ver < 3) { + ntfs_notice(sb, "Skip $Extend 'cause NTFS version"); + return 0; + } + + ref.low = cpu_to_le32(MFT_REC_EXTEND); + ref.high = 0; + ref.seq = cpu_to_le16(MFT_REC_EXTEND); + inode = ntfs_iget5(sb, &ref, &NAME_EXTEND); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + ntfs_err(sb, "Failed to load $Extend."); + inode = NULL; + goto out; + } + + /* if ntfs_iget5 reads from disk it never returns bad inode */ + if (!S_ISDIR(inode->i_mode)) { + err = -EINVAL; + goto out; + } + + /* Try to find $ObjId */ + inode2 = dir_search_u(inode, &NAME_OBJID, NULL); + if (inode2 && !IS_ERR(inode2)) { + if (is_bad_inode(inode2)) { + iput(inode2); + } else { + sbi->objid.ni = ntfs_i(inode2); + sbi->objid_no = inode2->i_ino; + } + } + + /* Try to find $Quota */ + inode2 = dir_search_u(inode, &NAME_QUOTA, NULL); + if (inode2 && !IS_ERR(inode2)) { + sbi->quota_no = inode2->i_ino; + iput(inode2); + } + + /* Try to find $Reparse */ + inode2 = dir_search_u(inode, &NAME_REPARSE, NULL); + if (inode2 && !IS_ERR(inode2)) { + sbi->reparse.ni = ntfs_i(inode2); + sbi->reparse_no = inode2->i_ino; + } + + /* Try to find $UsnJrnl */ + inode2 = dir_search_u(inode, &NAME_USNJRNL, NULL); + if (inode2 && !IS_ERR(inode2)) { + sbi->usn_jrnl_no = inode2->i_ino; + iput(inode2); + } + + err = 0; +out: + iput(inode); + return err; +} + +int ntfs_loadlog_and_replay(struct ntfs_inode *ni, struct ntfs_sb_info *sbi) +{ + int err = 0; + struct super_block *sb = sbi->sb; + bool initialized = false; + struct MFT_REF ref; + struct inode *inode; + + /* Check for 4GB */ + if (ni->vfs_inode.i_size >= 0x100000000ull) { + ntfs_err(sb, "\x24LogFile is too big"); + err = -EINVAL; + goto out; + } + + sbi->flags |= NTFS_FLAGS_LOG_REPLAYING; + + ref.low = cpu_to_le32(MFT_REC_MFT); + ref.high = 0; + ref.seq = cpu_to_le16(1); + + inode = ntfs_iget5(sb, &ref, NULL); + + if (IS_ERR(inode)) + inode = NULL; + + if (!inode) { + /* Try to use mft copy */ + u64 t64 = sbi->mft.lbo; + + sbi->mft.lbo = sbi->mft.lbo2; + inode = ntfs_iget5(sb, &ref, NULL); + sbi->mft.lbo = t64; + if (IS_ERR(inode)) + inode = NULL; + } + + if (!inode) { + err = -EINVAL; + ntfs_err(sb, "Failed to load $MFT."); + goto out; + } + + sbi->mft.ni = ntfs_i(inode); + + /* LogFile should not contains attribute list */ + err = ni_load_all_mi(sbi->mft.ni); + if (!err) + err = log_replay(ni, &initialized); + + iput(inode); + sbi->mft.ni = NULL; + + sync_blockdev(sb->s_bdev); + invalidate_bdev(sb->s_bdev); + + if (sbi->flags & NTFS_FLAGS_NEED_REPLAY) { + err = 0; + goto out; + } + + if (sb_rdonly(sb) || !initialized) + goto out; + + /* fill LogFile by '-1' if it is initialized */ + err = ntfs_bio_fill_1(sbi, &ni->file.run); + +out: + sbi->flags &= ~NTFS_FLAGS_LOG_REPLAYING; + + return err; +} + +/* + * ntfs_query_def + * + * returns current ATTR_DEF_ENTRY for given attribute type + */ +const struct ATTR_DEF_ENTRY *ntfs_query_def(struct ntfs_sb_info *sbi, + enum ATTR_TYPE type) +{ + int type_in = le32_to_cpu(type); + size_t min_idx = 0; + size_t max_idx = sbi->def_entries - 1; + + while (min_idx <= max_idx) { + size_t i = min_idx + ((max_idx - min_idx) >> 1); + const struct ATTR_DEF_ENTRY *entry = sbi->def_table + i; + int diff = le32_to_cpu(entry->type) - type_in; + + if (!diff) + return entry; + if (diff < 0) + min_idx = i + 1; + else if (i) + max_idx = i - 1; + else + return NULL; + } + return NULL; +} + +/* + * ntfs_look_for_free_space + * + * looks for a free space in bitmap + */ +int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len, + CLST *new_lcn, CLST *new_len, + enum ALLOCATE_OPT opt) +{ + int err; + struct super_block *sb = sbi->sb; + size_t a_lcn, zlen, zeroes, zlcn, zlen2, ztrim, new_zlen; + struct wnd_bitmap *wnd = &sbi->used.bitmap; + + down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); + if (opt & ALLOCATE_MFT) { + CLST alen; + + zlen = wnd_zone_len(wnd); + + if (!zlen) { + err = ntfs_refresh_zone(sbi); + if (err) + goto out; + + zlen = wnd_zone_len(wnd); + + if (!zlen) { + ntfs_err(sbi->sb, + "no free space to extend mft"); + err = -ENOSPC; + goto out; + } + } + + lcn = wnd_zone_bit(wnd); + alen = zlen > len ? len : zlen; + + wnd_zone_set(wnd, lcn + alen, zlen - alen); + + err = wnd_set_used(wnd, lcn, alen); + if (err) + goto out; + + *new_lcn = lcn; + *new_len = alen; + goto ok; + } + + /* + * 'Cause cluster 0 is always used this value means that we should use + * cached value of 'next_free_lcn' to improve performance + */ + if (!lcn) + lcn = sbi->used.next_free_lcn; + + if (lcn >= wnd->nbits) + lcn = 0; + + *new_len = wnd_find(wnd, len, lcn, BITMAP_FIND_MARK_AS_USED, &a_lcn); + if (*new_len) { + *new_lcn = a_lcn; + goto ok; + } + + /* Try to use clusters from MftZone */ + zlen = wnd_zone_len(wnd); + zeroes = wnd_zeroes(wnd); + + /* Check too big request */ + if (len > zeroes + zlen) + goto no_space; + + if (zlen <= NTFS_MIN_MFT_ZONE) + goto no_space; + + /* How many clusters to cat from zone */ + zlcn = wnd_zone_bit(wnd); + zlen2 = zlen >> 1; + ztrim = len > zlen ? zlen : (len > zlen2 ? len : zlen2); + new_zlen = zlen - ztrim; + + if (new_zlen < NTFS_MIN_MFT_ZONE) { + new_zlen = NTFS_MIN_MFT_ZONE; + if (new_zlen > zlen) + new_zlen = zlen; + } + + wnd_zone_set(wnd, zlcn, new_zlen); + + /* allocate continues clusters */ + *new_len = + wnd_find(wnd, len, 0, + BITMAP_FIND_MARK_AS_USED | BITMAP_FIND_FULL, &a_lcn); + if (*new_len) { + *new_lcn = a_lcn; + goto ok; + } + +no_space: + up_write(&wnd->rw_lock); + + return -ENOSPC; + +ok: + err = 0; + + ntfs_unmap_meta(sb, *new_lcn, *new_len); + + if (opt & ALLOCATE_MFT) + goto out; + + /* Set hint for next requests */ + sbi->used.next_free_lcn = *new_lcn + *new_len; + +out: + up_write(&wnd->rw_lock); + return err; +} + +/* + * ntfs_extend_mft + * + * allocates additional MFT records + * sbi->mft.bitmap is locked for write + * + * NOTE: recursive: + * ntfs_look_free_mft -> + * ntfs_extend_mft -> + * attr_set_size -> + * ni_insert_nonresident -> + * ni_insert_attr -> + * ni_ins_attr_ext -> + * ntfs_look_free_mft -> + * ntfs_extend_mft + * To avoid recursive always allocate space for two new mft records + * see attrib.c: "at least two mft to avoid recursive loop" + */ +static int ntfs_extend_mft(struct ntfs_sb_info *sbi) +{ + int err; + struct ntfs_inode *ni = sbi->mft.ni; + size_t new_mft_total; + u64 new_mft_bytes, new_bitmap_bytes; + struct ATTRIB *attr; + struct wnd_bitmap *wnd = &sbi->mft.bitmap; + + new_mft_total = (wnd->nbits + MFT_INCREASE_CHUNK + 127) & (CLST)~127; + new_mft_bytes = (u64)new_mft_total << sbi->record_bits; + + /* Step 1: Resize $MFT::DATA */ + down_write(&ni->file.run_lock); + err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, + new_mft_bytes, NULL, false, &attr); + + if (err) { + up_write(&ni->file.run_lock); + goto out; + } + + attr->nres.valid_size = attr->nres.data_size; + new_mft_total = le64_to_cpu(attr->nres.alloc_size) >> sbi->record_bits; + ni->mi.dirty = true; + + /* Step 2: Resize $MFT::BITMAP */ + new_bitmap_bytes = bitmap_size(new_mft_total); + + err = attr_set_size(ni, ATTR_BITMAP, NULL, 0, &sbi->mft.bitmap.run, + new_bitmap_bytes, &new_bitmap_bytes, true, NULL); + + /* Refresh Mft Zone if necessary */ + down_write_nested(&sbi->used.bitmap.rw_lock, BITMAP_MUTEX_CLUSTERS); + + ntfs_refresh_zone(sbi); + + up_write(&sbi->used.bitmap.rw_lock); + up_write(&ni->file.run_lock); + + if (err) + goto out; + + err = wnd_extend(wnd, new_mft_total); + + if (err) + goto out; + + ntfs_clear_mft_tail(sbi, sbi->mft.used, new_mft_total); + + err = _ni_write_inode(&ni->vfs_inode, 0); +out: + return err; +} + +/* + * ntfs_look_free_mft + * + * looks for a free MFT record + */ +int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft, + struct ntfs_inode *ni, struct mft_inode **mi) +{ + int err = 0; + size_t zbit, zlen, from, to, fr; + size_t mft_total; + struct MFT_REF ref; + struct super_block *sb = sbi->sb; + struct wnd_bitmap *wnd = &sbi->mft.bitmap; + u32 ir; + + static_assert(sizeof(sbi->mft.reserved_bitmap) * 8 >= + MFT_REC_FREE - MFT_REC_RESERVED); + + if (!mft) + down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); + + zlen = wnd_zone_len(wnd); + + /* Always reserve space for MFT */ + if (zlen) { + if (mft) { + zbit = wnd_zone_bit(wnd); + *rno = zbit; + wnd_zone_set(wnd, zbit + 1, zlen - 1); + } + goto found; + } + + /* No MFT zone. find the nearest to '0' free MFT */ + if (!wnd_find(wnd, 1, MFT_REC_FREE, 0, &zbit)) { + /* Resize MFT */ + mft_total = wnd->nbits; + + err = ntfs_extend_mft(sbi); + if (!err) { + zbit = mft_total; + goto reserve_mft; + } + + if (!mft || MFT_REC_FREE == sbi->mft.next_reserved) + goto out; + + err = 0; + + /* + * Look for free record reserved area [11-16) == + * [MFT_REC_RESERVED, MFT_REC_FREE ) MFT bitmap always + * marks it as used + */ + if (!sbi->mft.reserved_bitmap) { + /* Once per session create internal bitmap for 5 bits */ + sbi->mft.reserved_bitmap = 0xFF; + + ref.high = 0; + for (ir = MFT_REC_RESERVED; ir < MFT_REC_FREE; ir++) { + struct inode *i; + struct ntfs_inode *ni; + struct MFT_REC *mrec; + + ref.low = cpu_to_le32(ir); + ref.seq = cpu_to_le16(ir); + + i = ntfs_iget5(sb, &ref, NULL); + if (IS_ERR(i)) { +next: + ntfs_notice( + sb, + "Invalid reserved record %x", + ref.low); + continue; + } + if (is_bad_inode(i)) { + iput(i); + goto next; + } + + ni = ntfs_i(i); + + mrec = ni->mi.mrec; + + if (!is_rec_base(mrec)) + goto next; + + if (mrec->hard_links) + goto next; + + if (!ni_std(ni)) + goto next; + + if (ni_find_attr(ni, NULL, NULL, ATTR_NAME, + NULL, 0, NULL, NULL)) + goto next; + + __clear_bit(ir - MFT_REC_RESERVED, + &sbi->mft.reserved_bitmap); + } + } + + /* Scan 5 bits for zero. Bit 0 == MFT_REC_RESERVED */ + zbit = find_next_zero_bit(&sbi->mft.reserved_bitmap, + MFT_REC_FREE, MFT_REC_RESERVED); + if (zbit >= MFT_REC_FREE) { + sbi->mft.next_reserved = MFT_REC_FREE; + goto out; + } + + zlen = 1; + sbi->mft.next_reserved = zbit; + } else { +reserve_mft: + zlen = zbit == MFT_REC_FREE ? (MFT_REC_USER - MFT_REC_FREE) : 4; + if (zbit + zlen > wnd->nbits) + zlen = wnd->nbits - zbit; + + while (zlen > 1 && !wnd_is_free(wnd, zbit, zlen)) + zlen -= 1; + + /* [zbit, zbit + zlen) will be used for Mft itself */ + from = sbi->mft.used; + if (from < zbit) + from = zbit; + to = zbit + zlen; + if (from < to) { + ntfs_clear_mft_tail(sbi, from, to); + sbi->mft.used = to; + } + } + + if (mft) { + *rno = zbit; + zbit += 1; + zlen -= 1; + } + + wnd_zone_set(wnd, zbit, zlen); + +found: + if (!mft) { + /* The request to get record for general purpose */ + if (sbi->mft.next_free < MFT_REC_USER) + sbi->mft.next_free = MFT_REC_USER; + + for (;;) { + if (sbi->mft.next_free >= sbi->mft.bitmap.nbits) { + } else if (!wnd_find(wnd, 1, MFT_REC_USER, 0, &fr)) { + sbi->mft.next_free = sbi->mft.bitmap.nbits; + } else { + *rno = fr; + sbi->mft.next_free = *rno + 1; + break; + } + + err = ntfs_extend_mft(sbi); + if (err) + goto out; + } + } + + if (ni && !ni_add_subrecord(ni, *rno, mi)) { + err = -ENOMEM; + goto out; + } + + /* We have found a record that are not reserved for next MFT */ + if (*rno >= MFT_REC_FREE) + wnd_set_used(wnd, *rno, 1); + else if (*rno >= MFT_REC_RESERVED && sbi->mft.reserved_bitmap_inited) + __set_bit(*rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap); + +out: + if (!mft) + up_write(&wnd->rw_lock); + + return err; +} + +/* + * ntfs_mark_rec_free + * + * marks record as free + */ +void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno) +{ + struct wnd_bitmap *wnd = &sbi->mft.bitmap; + + down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT); + if (rno >= wnd->nbits) + goto out; + + if (rno >= MFT_REC_FREE) { + if (!wnd_is_used(wnd, rno, 1)) + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + else + wnd_set_free(wnd, rno, 1); + } else if (rno >= MFT_REC_RESERVED && sbi->mft.reserved_bitmap_inited) { + __clear_bit(rno - MFT_REC_RESERVED, &sbi->mft.reserved_bitmap); + } + + if (rno < wnd_zone_bit(wnd)) + wnd_zone_set(wnd, rno, 1); + else if (rno < sbi->mft.next_free && rno >= MFT_REC_USER) + sbi->mft.next_free = rno; + +out: + up_write(&wnd->rw_lock); +} + +/* + * ntfs_clear_mft_tail + * + * formats empty records [from, to) + * sbi->mft.bitmap is locked for write + */ +int ntfs_clear_mft_tail(struct ntfs_sb_info *sbi, size_t from, size_t to) +{ + int err; + u32 rs; + u64 vbo; + struct runs_tree *run; + struct ntfs_inode *ni; + + if (from >= to) + return 0; + + rs = sbi->record_size; + ni = sbi->mft.ni; + run = &ni->file.run; + + down_read(&ni->file.run_lock); + vbo = (u64)from * rs; + for (; from < to; from++, vbo += rs) { + struct ntfs_buffers nb; + + err = ntfs_get_bh(sbi, run, vbo, rs, &nb); + if (err) + goto out; + + err = ntfs_write_bh(sbi, &sbi->new_rec->rhdr, &nb, 0); + nb_put(&nb); + if (err) + goto out; + } + +out: + sbi->mft.used = from; + up_read(&ni->file.run_lock); + return err; +} + +/* + * ntfs_refresh_zone + * + * refreshes Mft zone + * sbi->used.bitmap is locked for rw + * sbi->mft.bitmap is locked for write + * sbi->mft.ni->file.run_lock for write + */ +int ntfs_refresh_zone(struct ntfs_sb_info *sbi) +{ + CLST zone_limit, zone_max, lcn, vcn, len; + size_t lcn_s, zlen; + struct wnd_bitmap *wnd = &sbi->used.bitmap; + struct ntfs_inode *ni = sbi->mft.ni; + + /* Do not change anything unless we have non empty Mft zone */ + if (wnd_zone_len(wnd)) + return 0; + + /* + * Compute the mft zone at two steps + * It would be nice if we are able to allocate + * 1/8 of total clusters for MFT but not more then 512 MB + */ + zone_limit = (512 * 1024 * 1024) >> sbi->cluster_bits; + zone_max = wnd->nbits >> 3; + if (zone_max > zone_limit) + zone_max = zone_limit; + + vcn = bytes_to_cluster(sbi, + (u64)sbi->mft.bitmap.nbits << sbi->record_bits); + + if (!run_lookup_entry(&ni->file.run, vcn - 1, &lcn, &len, NULL)) + lcn = SPARSE_LCN; + + /* We should always find Last Lcn for MFT */ + if (lcn == SPARSE_LCN) + return -EINVAL; + + lcn_s = lcn + 1; + + /* Try to allocate clusters after last MFT run */ + zlen = wnd_find(wnd, zone_max, lcn_s, 0, &lcn_s); + if (!zlen) { + ntfs_notice(sbi->sb, "MftZone: unavailable"); + return 0; + } + + /* Truncate too large zone */ + wnd_zone_set(wnd, lcn_s, zlen); + + return 0; +} + +/* + * ntfs_update_mftmirr + * + * updates $MFTMirr data + */ +int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait) +{ + int err; + struct super_block *sb = sbi->sb; + u32 blocksize = sb->s_blocksize; + sector_t block1, block2; + u32 bytes; + + if (!(sbi->flags & NTFS_FLAGS_MFTMIRR)) + return 0; + + err = 0; + bytes = sbi->mft.recs_mirr << sbi->record_bits; + block1 = sbi->mft.lbo >> sb->s_blocksize_bits; + block2 = sbi->mft.lbo2 >> sb->s_blocksize_bits; + + for (; bytes >= blocksize; bytes -= blocksize) { + struct buffer_head *bh1, *bh2; + + bh1 = sb_bread(sb, block1++); + if (!bh1) { + err = -EIO; + goto out; + } + + bh2 = sb_getblk(sb, block2++); + if (!bh2) { + put_bh(bh1); + err = -EIO; + goto out; + } + + if (buffer_locked(bh2)) + __wait_on_buffer(bh2); + + lock_buffer(bh2); + memcpy(bh2->b_data, bh1->b_data, blocksize); + set_buffer_uptodate(bh2); + mark_buffer_dirty(bh2); + unlock_buffer(bh2); + + put_bh(bh1); + bh1 = NULL; + + if (wait) + err = sync_dirty_buffer(bh2); + + put_bh(bh2); + if (err) + goto out; + } + + sbi->flags &= ~NTFS_FLAGS_MFTMIRR; + +out: + return err; +} + +/* + * ntfs_set_state + * + * mount: ntfs_set_state(NTFS_DIRTY_DIRTY) + * umount: ntfs_set_state(NTFS_DIRTY_CLEAR) + * ntfs error: ntfs_set_state(NTFS_DIRTY_ERROR) + */ +int ntfs_set_state(struct ntfs_sb_info *sbi, enum NTFS_DIRTY_FLAGS dirty) +{ + int err; + struct ATTRIB *attr; + struct VOLUME_INFO *info; + struct mft_inode *mi; + struct ntfs_inode *ni; + + /* + * do not change state if fs was real_dirty + * do not change state if fs already dirty(clear) + * do not change any thing if mounted read only + */ + if (sbi->volume.real_dirty || sb_rdonly(sbi->sb)) + return 0; + + /* Check cached value */ + if ((dirty == NTFS_DIRTY_CLEAR ? 0 : VOLUME_FLAG_DIRTY) == + (sbi->volume.flags & VOLUME_FLAG_DIRTY)) + return 0; + + ni = sbi->volume.ni; + if (!ni) + return -EINVAL; + + mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_DIRTY); + + attr = ni_find_attr(ni, NULL, NULL, ATTR_VOL_INFO, NULL, 0, NULL, &mi); + if (!attr) { + err = -EINVAL; + goto out; + } + + info = resident_data_ex(attr, SIZEOF_ATTRIBUTE_VOLUME_INFO); + if (!info) { + err = -EINVAL; + goto out; + } + + switch (dirty) { + case NTFS_DIRTY_ERROR: + ntfs_notice(sbi->sb, "Mark volume as dirty due to NTFS errors"); + sbi->volume.real_dirty = true; + fallthrough; + case NTFS_DIRTY_DIRTY: + info->flags |= VOLUME_FLAG_DIRTY; + break; + case NTFS_DIRTY_CLEAR: + info->flags &= ~VOLUME_FLAG_DIRTY; + break; + } + /* cache current volume flags*/ + sbi->volume.flags = info->flags; + mi->dirty = true; + err = 0; + +out: + ni_unlock(ni); + if (err) + return err; + + mark_inode_dirty(&ni->vfs_inode); + /*verify(!ntfs_update_mftmirr()); */ + err = sync_inode_metadata(&ni->vfs_inode, 1); + + return err; +} + +/* + * security_hash + * + * calculates a hash of security descriptor + */ +static inline __le32 security_hash(const void *sd, size_t bytes) +{ + u32 hash = 0; + const __le32 *ptr = sd; + + bytes >>= 2; + while (bytes--) + hash = ((hash >> 0x1D) | (hash << 3)) + le32_to_cpu(*ptr++); + return cpu_to_le32(hash); +} + +int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer) +{ + struct block_device *bdev = sb->s_bdev; + u32 blocksize = sb->s_blocksize; + u64 block = lbo >> sb->s_blocksize_bits; + u32 off = lbo & (blocksize - 1); + u32 op = blocksize - off; + + for (; bytes; block += 1, off = 0, op = blocksize) { + struct buffer_head *bh = __bread(bdev, block, blocksize); + + if (!bh) + return -EIO; + + if (op > bytes) + op = bytes; + + memcpy(buffer, bh->b_data + off, op); + + put_bh(bh); + + bytes -= op; + buffer = Add2Ptr(buffer, op); + } + + return 0; +} + +int ntfs_sb_write(struct super_block *sb, u64 lbo, size_t bytes, + const void *buf, int wait) +{ + u32 blocksize = sb->s_blocksize; + struct block_device *bdev = sb->s_bdev; + sector_t block = lbo >> sb->s_blocksize_bits; + u32 off = lbo & (blocksize - 1); + u32 op = blocksize - off; + struct buffer_head *bh; + + if (!wait && (sb->s_flags & SB_SYNCHRONOUS)) + wait = 1; + + for (; bytes; block += 1, off = 0, op = blocksize) { + if (op > bytes) + op = bytes; + + if (op < blocksize) { + bh = __bread(bdev, block, blocksize); + if (!bh) { + ntfs_err(sb, "failed to read block %llx", + (u64)block); + return -EIO; + } + } else { + bh = __getblk(bdev, block, blocksize); + if (!bh) + return -ENOMEM; + } + + if (buffer_locked(bh)) + __wait_on_buffer(bh); + + lock_buffer(bh); + if (buf) { + memcpy(bh->b_data + off, buf, op); + buf = Add2Ptr(buf, op); + } else { + memset(bh->b_data + off, -1, op); + } + + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + unlock_buffer(bh); + + if (wait) { + int err = sync_dirty_buffer(bh); + + if (err) { + ntfs_err( + sb, + "failed to sync buffer at block %llx, error %d", + (u64)block, err); + put_bh(bh); + return err; + } + } + + put_bh(bh); + + bytes -= op; + } + return 0; +} + +int ntfs_sb_write_run(struct ntfs_sb_info *sbi, const struct runs_tree *run, + u64 vbo, const void *buf, size_t bytes) +{ + struct super_block *sb = sbi->sb; + u8 cluster_bits = sbi->cluster_bits; + u32 off = vbo & sbi->cluster_mask; + CLST lcn, clen, vcn = vbo >> cluster_bits, vcn_next; + u64 lbo, len; + size_t idx; + + if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) + return -ENOENT; + + if (lcn == SPARSE_LCN) + return -EINVAL; + + lbo = ((u64)lcn << cluster_bits) + off; + len = ((u64)clen << cluster_bits) - off; + + for (;;) { + u32 op = len < bytes ? len : bytes; + int err = ntfs_sb_write(sb, lbo, op, buf, 0); + + if (err) + return err; + + bytes -= op; + if (!bytes) + break; + + vcn_next = vcn + clen; + if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) || + vcn != vcn_next) + return -ENOENT; + + if (lcn == SPARSE_LCN) + return -EINVAL; + + if (buf) + buf = Add2Ptr(buf, op); + + lbo = ((u64)lcn << cluster_bits); + len = ((u64)clen << cluster_bits); + } + + return 0; +} + +struct buffer_head *ntfs_bread_run(struct ntfs_sb_info *sbi, + const struct runs_tree *run, u64 vbo) +{ + struct super_block *sb = sbi->sb; + u8 cluster_bits = sbi->cluster_bits; + CLST lcn; + u64 lbo; + + if (!run_lookup_entry(run, vbo >> cluster_bits, &lcn, NULL, NULL)) + return ERR_PTR(-ENOENT); + + lbo = ((u64)lcn << cluster_bits) + (vbo & sbi->cluster_mask); + + return ntfs_bread(sb, lbo >> sb->s_blocksize_bits); +} + +int ntfs_read_run_nb(struct ntfs_sb_info *sbi, const struct runs_tree *run, + u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb) +{ + int err; + struct super_block *sb = sbi->sb; + u32 blocksize = sb->s_blocksize; + u8 cluster_bits = sbi->cluster_bits; + u32 off = vbo & sbi->cluster_mask; + u32 nbh = 0; + CLST vcn_next, vcn = vbo >> cluster_bits; + CLST lcn, clen; + u64 lbo, len; + size_t idx; + struct buffer_head *bh; + + if (!run) { + /* first reading of $Volume + $MFTMirr + LogFile goes here*/ + if (vbo > MFT_REC_VOL * sbi->record_size) { + err = -ENOENT; + goto out; + } + + /* use absolute boot's 'MFTCluster' to read record */ + lbo = vbo + sbi->mft.lbo; + len = sbi->record_size; + } else if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) { + err = -ENOENT; + goto out; + } else { + if (lcn == SPARSE_LCN) { + err = -EINVAL; + goto out; + } + + lbo = ((u64)lcn << cluster_bits) + off; + len = ((u64)clen << cluster_bits) - off; + } + + off = lbo & (blocksize - 1); + if (nb) { + nb->off = off; + nb->bytes = bytes; + } + + for (;;) { + u32 len32 = len >= bytes ? bytes : len; + sector_t block = lbo >> sb->s_blocksize_bits; + + do { + u32 op = blocksize - off; + + if (op > len32) + op = len32; + + bh = ntfs_bread(sb, block); + if (!bh) { + err = -EIO; + goto out; + } + + if (buf) { + memcpy(buf, bh->b_data + off, op); + buf = Add2Ptr(buf, op); + } + + if (!nb) { + put_bh(bh); + } else if (nbh >= ARRAY_SIZE(nb->bh)) { + err = -EINVAL; + goto out; + } else { + nb->bh[nbh++] = bh; + nb->nbufs = nbh; + } + + bytes -= op; + if (!bytes) + return 0; + len32 -= op; + block += 1; + off = 0; + + } while (len32); + + vcn_next = vcn + clen; + if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) || + vcn != vcn_next) { + err = -ENOENT; + goto out; + } + + if (lcn == SPARSE_LCN) { + err = -EINVAL; + goto out; + } + + lbo = ((u64)lcn << cluster_bits); + len = ((u64)clen << cluster_bits); + } + +out: + if (!nbh) + return err; + + while (nbh) { + put_bh(nb->bh[--nbh]); + nb->bh[nbh] = NULL; + } + + nb->nbufs = 0; + return err; +} + +/* Returns < 0 if error, 0 if ok, '-E_NTFS_FIXUP' if need to update fixups */ +int ntfs_read_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, + struct NTFS_RECORD_HEADER *rhdr, u32 bytes, + struct ntfs_buffers *nb) +{ + int err = ntfs_read_run_nb(sbi, run, vbo, rhdr, bytes, nb); + + if (err) + return err; + return ntfs_fix_post_read(rhdr, nb->bytes, true); +} + +int ntfs_get_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, + u32 bytes, struct ntfs_buffers *nb) +{ + int err = 0; + struct super_block *sb = sbi->sb; + u32 blocksize = sb->s_blocksize; + u8 cluster_bits = sbi->cluster_bits; + CLST vcn_next, vcn = vbo >> cluster_bits; + u32 off; + u32 nbh = 0; + CLST lcn, clen; + u64 lbo, len; + size_t idx; + + nb->bytes = bytes; + + if (!run_lookup_entry(run, vcn, &lcn, &clen, &idx)) { + err = -ENOENT; + goto out; + } + + off = vbo & sbi->cluster_mask; + lbo = ((u64)lcn << cluster_bits) + off; + len = ((u64)clen << cluster_bits) - off; + + nb->off = off = lbo & (blocksize - 1); + + for (;;) { + u32 len32 = len < bytes ? len : bytes; + sector_t block = lbo >> sb->s_blocksize_bits; + + do { + u32 op; + struct buffer_head *bh; + + if (nbh >= ARRAY_SIZE(nb->bh)) { + err = -EINVAL; + goto out; + } + + op = blocksize - off; + if (op > len32) + op = len32; + + if (op == blocksize) { + bh = sb_getblk(sb, block); + if (!bh) { + err = -ENOMEM; + goto out; + } + if (buffer_locked(bh)) + __wait_on_buffer(bh); + set_buffer_uptodate(bh); + } else { + bh = ntfs_bread(sb, block); + if (!bh) { + err = -EIO; + goto out; + } + } + + nb->bh[nbh++] = bh; + bytes -= op; + if (!bytes) { + nb->nbufs = nbh; + return 0; + } + + block += 1; + len32 -= op; + off = 0; + } while (len32); + + vcn_next = vcn + clen; + if (!run_get_entry(run, ++idx, &vcn, &lcn, &clen) || + vcn != vcn_next) { + err = -ENOENT; + goto out; + } + + lbo = ((u64)lcn << cluster_bits); + len = ((u64)clen << cluster_bits); + } + +out: + while (nbh) { + put_bh(nb->bh[--nbh]); + nb->bh[nbh] = NULL; + } + + nb->nbufs = 0; + + return err; +} + +int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr, + struct ntfs_buffers *nb, int sync) +{ + int err = 0; + struct super_block *sb = sbi->sb; + u32 block_size = sb->s_blocksize; + u32 bytes = nb->bytes; + u32 off = nb->off; + u16 fo = le16_to_cpu(rhdr->fix_off); + u16 fn = le16_to_cpu(rhdr->fix_num); + u32 idx; + __le16 *fixup; + __le16 sample; + + if ((fo & 1) || fo + fn * sizeof(short) > SECTOR_SIZE || !fn-- || + fn * SECTOR_SIZE > bytes) { + return -EINVAL; + } + + for (idx = 0; bytes && idx < nb->nbufs; idx += 1, off = 0) { + u32 op = block_size - off; + char *bh_data; + struct buffer_head *bh = nb->bh[idx]; + __le16 *ptr, *end_data; + + if (op > bytes) + op = bytes; + + if (buffer_locked(bh)) + __wait_on_buffer(bh); + + lock_buffer(nb->bh[idx]); + + bh_data = bh->b_data + off; + end_data = Add2Ptr(bh_data, op); + memcpy(bh_data, rhdr, op); + + if (!idx) { + u16 t16; + + fixup = Add2Ptr(bh_data, fo); + sample = *fixup; + t16 = le16_to_cpu(sample); + if (t16 >= 0x7FFF) { + sample = *fixup = cpu_to_le16(1); + } else { + sample = cpu_to_le16(t16 + 1); + *fixup = sample; + } + + *(__le16 *)Add2Ptr(rhdr, fo) = sample; + } + + ptr = Add2Ptr(bh_data, SECTOR_SIZE - sizeof(short)); + + do { + *++fixup = *ptr; + *ptr = sample; + ptr += SECTOR_SIZE / sizeof(short); + } while (ptr < end_data); + + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + unlock_buffer(bh); + + if (sync) { + int err2 = sync_dirty_buffer(bh); + + if (!err && err2) + err = err2; + } + + bytes -= op; + rhdr = Add2Ptr(rhdr, op); + } + + return err; +} + +static inline struct bio *ntfs_alloc_bio(u32 nr_vecs) +{ + struct bio *bio = bio_alloc(GFP_NOFS | __GFP_HIGH, nr_vecs); + + if (!bio && (current->flags & PF_MEMALLOC)) { + while (!bio && (nr_vecs /= 2)) + bio = bio_alloc(GFP_NOFS | __GFP_HIGH, nr_vecs); + } + return bio; +} + +/* read/write pages from/to disk*/ +int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run, + struct page **pages, u32 nr_pages, u64 vbo, u32 bytes, + u32 op) +{ + int err = 0; + struct bio *new, *bio = NULL; + struct super_block *sb = sbi->sb; + struct block_device *bdev = sb->s_bdev; + struct page *page; + u8 cluster_bits = sbi->cluster_bits; + CLST lcn, clen, vcn, vcn_next; + u32 add, off, page_idx; + u64 lbo, len; + size_t run_idx; + struct blk_plug plug; + + if (!bytes) + return 0; + + blk_start_plug(&plug); + + /* align vbo and bytes to be 512 bytes aligned */ + lbo = (vbo + bytes + 511) & ~511ull; + vbo = vbo & ~511ull; + bytes = lbo - vbo; + + vcn = vbo >> cluster_bits; + if (!run_lookup_entry(run, vcn, &lcn, &clen, &run_idx)) { + err = -ENOENT; + goto out; + } + off = vbo & sbi->cluster_mask; + page_idx = 0; + page = pages[0]; + + for (;;) { + lbo = ((u64)lcn << cluster_bits) + off; + len = ((u64)clen << cluster_bits) - off; +new_bio: + new = ntfs_alloc_bio(nr_pages - page_idx); + if (!new) { + err = -ENOMEM; + goto out; + } + if (bio) { + bio_chain(bio, new); + submit_bio(bio); + } + bio = new; + bio_set_dev(bio, bdev); + bio->bi_iter.bi_sector = lbo >> 9; + bio->bi_opf = op; + + while (len) { + off = vbo & (PAGE_SIZE - 1); + add = off + len > PAGE_SIZE ? (PAGE_SIZE - off) : len; + + if (bio_add_page(bio, page, add, off) < add) + goto new_bio; + + if (bytes <= add) + goto out; + bytes -= add; + vbo += add; + + if (add + off == PAGE_SIZE) { + page_idx += 1; + if (WARN_ON(page_idx >= nr_pages)) { + err = -EINVAL; + goto out; + } + page = pages[page_idx]; + } + + if (len <= add) + break; + len -= add; + lbo += add; + } + + vcn_next = vcn + clen; + if (!run_get_entry(run, ++run_idx, &vcn, &lcn, &clen) || + vcn != vcn_next) { + err = -ENOENT; + goto out; + } + off = 0; + } +out: + if (bio) { + if (!err) + err = submit_bio_wait(bio); + bio_put(bio); + } + blk_finish_plug(&plug); + + return err; +} + +/* + * Helper for ntfs_loadlog_and_replay + * fill on-disk logfile range by (-1) + * this means empty logfile + */ +int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run) +{ + int err = 0; + struct super_block *sb = sbi->sb; + struct block_device *bdev = sb->s_bdev; + u8 cluster_bits = sbi->cluster_bits; + struct bio *new, *bio = NULL; + CLST lcn, clen; + u64 lbo, len; + size_t run_idx; + struct page *fill; + void *kaddr; + struct blk_plug plug; + + fill = alloc_page(GFP_KERNEL); + if (!fill) + return -ENOMEM; + + kaddr = kmap_atomic(fill); + memset(kaddr, -1, PAGE_SIZE); + kunmap_atomic(kaddr); + flush_dcache_page(fill); + lock_page(fill); + + if (!run_lookup_entry(run, 0, &lcn, &clen, &run_idx)) { + err = -ENOENT; + goto out; + } + + /* + * TODO: try blkdev_issue_write_same + */ + blk_start_plug(&plug); + do { + lbo = (u64)lcn << cluster_bits; + len = (u64)clen << cluster_bits; +new_bio: + new = ntfs_alloc_bio(BIO_MAX_PAGES); + if (!new) { + err = -ENOMEM; + break; + } + if (bio) { + bio_chain(bio, new); + submit_bio(bio); + } + bio = new; + bio_set_dev(bio, bdev); + bio->bi_opf = REQ_OP_WRITE; + bio->bi_iter.bi_sector = lbo >> 9; + + for (;;) { + u32 add = len > PAGE_SIZE ? PAGE_SIZE : len; + + if (bio_add_page(bio, fill, add, 0) < add) + goto new_bio; + + lbo += add; + if (len <= add) + break; + len -= add; + } + } while (run_get_entry(run, ++run_idx, NULL, &lcn, &clen)); + + if (bio) { + if (!err) + err = submit_bio_wait(bio); + bio_put(bio); + } + blk_finish_plug(&plug); +out: + unlock_page(fill); + put_page(fill); + + return err; +} + +int ntfs_vbo_to_lbo(struct ntfs_sb_info *sbi, const struct runs_tree *run, + u64 vbo, u64 *lbo, u64 *bytes) +{ + u32 off; + CLST lcn, len; + u8 cluster_bits = sbi->cluster_bits; + + if (!run_lookup_entry(run, vbo >> cluster_bits, &lcn, &len, NULL)) + return -ENOENT; + + off = vbo & sbi->cluster_mask; + *lbo = lcn == SPARSE_LCN ? -1 : (((u64)lcn << cluster_bits) + off); + *bytes = ((u64)len << cluster_bits) - off; + + return 0; +} + +struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST rno, bool dir) +{ + int err = 0; + struct super_block *sb = sbi->sb; + struct inode *inode = new_inode(sb); + struct ntfs_inode *ni; + + if (!inode) + return ERR_PTR(-ENOMEM); + + ni = ntfs_i(inode); + + err = mi_format_new(&ni->mi, sbi, rno, dir ? RECORD_FLAG_DIR : 0, + false); + if (err) + goto out; + + inode->i_ino = rno; + if (insert_inode_locked(inode) < 0) { + err = -EIO; + goto out; + } + +out: + if (err) { + iput(inode); + ni = ERR_PTR(err); + } + return ni; +} + +/* + * O:BAG:BAD:(A;OICI;FA;;;WD) + * owner S-1-5-32-544 (Administrators) + * group S-1-5-32-544 (Administrators) + * ACE: allow S-1-1-0 (Everyone) with FILE_ALL_ACCESS + */ +const u8 s_default_security[] __aligned(8) = { + 0x01, 0x00, 0x04, 0x80, 0x30, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x02, 0x00, 0x1C, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x03, 0x14, 0x00, 0xFF, 0x01, 0x1F, 0x00, + 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x20, 0x00, 0x00, 0x00, + 0x20, 0x02, 0x00, 0x00, 0x01, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, + 0x20, 0x00, 0x00, 0x00, 0x20, 0x02, 0x00, 0x00, +}; + +static_assert(sizeof(s_default_security) == 0x50); + +static inline u32 sid_length(const struct SID *sid) +{ + return offsetof(struct SID, SubAuthority[0]) + + (sid->SubAuthorityCount * sizeof(u32)); +} + +/* + * Thanks Mark Harmstone for idea + */ +static bool is_acl_valid(const struct ACL *acl, u32 len) +{ + const struct ACE_HEADER *ace; + u32 i; + u16 ace_count, ace_size; + + if (acl->AclRevision != ACL_REVISION && + acl->AclRevision != ACL_REVISION_DS) { + /* + * This value should be ACL_REVISION, unless the ACL contains an + * object-specific ACE, in which case this value must be ACL_REVISION_DS. + * All ACEs in an ACL must be at the same revision level. + */ + return false; + } + + if (acl->Sbz1) + return false; + + if (le16_to_cpu(acl->AclSize) > len) + return false; + + if (acl->Sbz2) + return false; + + len -= sizeof(struct ACL); + ace = (struct ACE_HEADER *)&acl[1]; + ace_count = le16_to_cpu(acl->AceCount); + + for (i = 0; i < ace_count; i++) { + if (len < sizeof(struct ACE_HEADER)) + return false; + + ace_size = le16_to_cpu(ace->AceSize); + if (len < ace_size) + return false; + + len -= ace_size; + ace = Add2Ptr(ace, ace_size); + } + + return true; +} + +bool is_sd_valid(const struct SECURITY_DESCRIPTOR_RELATIVE *sd, u32 len) +{ + u32 sd_owner, sd_group, sd_sacl, sd_dacl; + + if (len < sizeof(struct SECURITY_DESCRIPTOR_RELATIVE)) + return false; + + if (sd->Revision != 1) + return false; + + if (sd->Sbz1) + return false; + + if (!(sd->Control & SE_SELF_RELATIVE)) + return false; + + sd_owner = le32_to_cpu(sd->Owner); + if (sd_owner) { + const struct SID *owner = Add2Ptr(sd, sd_owner); + + if (sd_owner + offsetof(struct SID, SubAuthority) > len) + return false; + + if (owner->Revision != 1) + return false; + + if (sd_owner + sid_length(owner) > len) + return false; + } + + sd_group = le32_to_cpu(sd->Group); + if (sd_group) { + const struct SID *group = Add2Ptr(sd, sd_group); + + if (sd_group + offsetof(struct SID, SubAuthority) > len) + return false; + + if (group->Revision != 1) + return false; + + if (sd_group + sid_length(group) > len) + return false; + } + + sd_sacl = le32_to_cpu(sd->Sacl); + if (sd_sacl) { + const struct ACL *sacl = Add2Ptr(sd, sd_sacl); + + if (sd_sacl + sizeof(struct ACL) > len) + return false; + + if (!is_acl_valid(sacl, len - sd_sacl)) + return false; + } + + sd_dacl = le32_to_cpu(sd->Dacl); + if (sd_dacl) { + const struct ACL *dacl = Add2Ptr(sd, sd_dacl); + + if (sd_dacl + sizeof(struct ACL) > len) + return false; + + if (!is_acl_valid(dacl, len - sd_dacl)) + return false; + } + + return true; +} + +/* + * ntfs_security_init + * + * loads and parse $Secure + */ +int ntfs_security_init(struct ntfs_sb_info *sbi) +{ + int err; + struct super_block *sb = sbi->sb; + struct inode *inode; + struct ntfs_inode *ni; + struct MFT_REF ref; + struct ATTRIB *attr; + struct ATTR_LIST_ENTRY *le; + u64 sds_size; + size_t cnt, off; + struct NTFS_DE *ne; + struct NTFS_DE_SII *sii_e; + struct ntfs_fnd *fnd_sii = NULL; + const struct INDEX_ROOT *root_sii; + const struct INDEX_ROOT *root_sdh; + struct ntfs_index *indx_sdh = &sbi->security.index_sdh; + struct ntfs_index *indx_sii = &sbi->security.index_sii; + + ref.low = cpu_to_le32(MFT_REC_SECURE); + ref.high = 0; + ref.seq = cpu_to_le16(MFT_REC_SECURE); + + inode = ntfs_iget5(sb, &ref, &NAME_SECURE); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + ntfs_err(sb, "Failed to load $Secure."); + inode = NULL; + goto out; + } + + ni = ntfs_i(inode); + + le = NULL; + + attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SDH_NAME, + ARRAY_SIZE(SDH_NAME), NULL, NULL); + if (!attr) { + err = -EINVAL; + goto out; + } + + root_sdh = resident_data(attr); + if (root_sdh->type != ATTR_ZERO || + root_sdh->rule != NTFS_COLLATION_TYPE_SECURITY_HASH) { + err = -EINVAL; + goto out; + } + + err = indx_init(indx_sdh, sbi, attr, INDEX_MUTEX_SDH); + if (err) + goto out; + + attr = ni_find_attr(ni, attr, &le, ATTR_ROOT, SII_NAME, + ARRAY_SIZE(SII_NAME), NULL, NULL); + if (!attr) { + err = -EINVAL; + goto out; + } + + root_sii = resident_data(attr); + if (root_sii->type != ATTR_ZERO || + root_sii->rule != NTFS_COLLATION_TYPE_UINT) { + err = -EINVAL; + goto out; + } + + err = indx_init(indx_sii, sbi, attr, INDEX_MUTEX_SII); + if (err) + goto out; + + fnd_sii = fnd_get(); + if (!fnd_sii) { + err = -ENOMEM; + goto out; + } + + sds_size = inode->i_size; + + /* Find the last valid Id */ + sbi->security.next_id = SECURITY_ID_FIRST; + /* Always write new security at the end of bucket */ + sbi->security.next_off = + Quad2Align(sds_size - SecurityDescriptorsBlockSize); + + cnt = 0; + off = 0; + ne = NULL; + + for (;;) { + u32 next_id; + + err = indx_find_raw(indx_sii, ni, root_sii, &ne, &off, fnd_sii); + if (err || !ne) + break; + + sii_e = (struct NTFS_DE_SII *)ne; + if (le16_to_cpu(ne->view.data_size) < SIZEOF_SECURITY_HDR) + continue; + + next_id = le32_to_cpu(sii_e->sec_id) + 1; + if (next_id >= sbi->security.next_id) + sbi->security.next_id = next_id; + + cnt += 1; + } + + sbi->security.ni = ni; + inode = NULL; +out: + iput(inode); + fnd_put(fnd_sii); + + return err; +} + +/* + * ntfs_get_security_by_id + * + * reads security descriptor by id + */ +int ntfs_get_security_by_id(struct ntfs_sb_info *sbi, __le32 security_id, + struct SECURITY_DESCRIPTOR_RELATIVE **sd, + size_t *size) +{ + int err; + int diff; + struct ntfs_inode *ni = sbi->security.ni; + struct ntfs_index *indx = &sbi->security.index_sii; + void *p = NULL; + struct NTFS_DE_SII *sii_e; + struct ntfs_fnd *fnd_sii; + struct SECURITY_HDR d_security; + const struct INDEX_ROOT *root_sii; + u32 t32; + + *sd = NULL; + + mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_SECURITY); + + fnd_sii = fnd_get(); + if (!fnd_sii) { + err = -ENOMEM; + goto out; + } + + root_sii = indx_get_root(indx, ni, NULL, NULL); + if (!root_sii) { + err = -EINVAL; + goto out; + } + + /* Try to find this SECURITY descriptor in SII indexes */ + err = indx_find(indx, ni, root_sii, &security_id, sizeof(security_id), + NULL, &diff, (struct NTFS_DE **)&sii_e, fnd_sii); + if (err) + goto out; + + if (diff) + goto out; + + t32 = le32_to_cpu(sii_e->sec_hdr.size); + if (t32 < SIZEOF_SECURITY_HDR) { + err = -EINVAL; + goto out; + } + + if (t32 > SIZEOF_SECURITY_HDR + 0x10000) { + /* + * looks like too big security. 0x10000 - is arbitrary big number + */ + err = -EFBIG; + goto out; + } + + *size = t32 - SIZEOF_SECURITY_HDR; + + p = ntfs_malloc(*size); + if (!p) { + err = -ENOMEM; + goto out; + } + + err = ntfs_read_run_nb(sbi, &ni->file.run, + le64_to_cpu(sii_e->sec_hdr.off), &d_security, + sizeof(d_security), NULL); + if (err) + goto out; + + if (memcmp(&d_security, &sii_e->sec_hdr, SIZEOF_SECURITY_HDR)) { + err = -EINVAL; + goto out; + } + + err = ntfs_read_run_nb(sbi, &ni->file.run, + le64_to_cpu(sii_e->sec_hdr.off) + + SIZEOF_SECURITY_HDR, + p, *size, NULL); + if (err) + goto out; + + *sd = p; + p = NULL; + +out: + ntfs_free(p); + fnd_put(fnd_sii); + ni_unlock(ni); + + return err; +} + +/* + * ntfs_insert_security + * + * inserts security descriptor into $Secure::SDS + * + * SECURITY Descriptor Stream data is organized into chunks of 256K bytes + * and it contains a mirror copy of each security descriptor. When writing + * to a security descriptor at location X, another copy will be written at + * location (X+256K). + * When writing a security descriptor that will cross the 256K boundary, + * the pointer will be advanced by 256K to skip + * over the mirror portion. + */ +int ntfs_insert_security(struct ntfs_sb_info *sbi, + const struct SECURITY_DESCRIPTOR_RELATIVE *sd, + u32 size_sd, __le32 *security_id, bool *inserted) +{ + int err, diff; + struct ntfs_inode *ni = sbi->security.ni; + struct ntfs_index *indx_sdh = &sbi->security.index_sdh; + struct ntfs_index *indx_sii = &sbi->security.index_sii; + struct NTFS_DE_SDH *e; + struct NTFS_DE_SDH sdh_e; + struct NTFS_DE_SII sii_e; + struct SECURITY_HDR *d_security; + u32 new_sec_size = size_sd + SIZEOF_SECURITY_HDR; + u32 aligned_sec_size = Quad2Align(new_sec_size); + struct SECURITY_KEY hash_key; + struct ntfs_fnd *fnd_sdh = NULL; + const struct INDEX_ROOT *root_sdh; + const struct INDEX_ROOT *root_sii; + u64 mirr_off, new_sds_size; + u32 next, left; + + static_assert((1 << Log2OfSecurityDescriptorsBlockSize) == + SecurityDescriptorsBlockSize); + + hash_key.hash = security_hash(sd, size_sd); + hash_key.sec_id = SECURITY_ID_INVALID; + + if (inserted) + *inserted = false; + *security_id = SECURITY_ID_INVALID; + + /* Allocate a temporal buffer*/ + d_security = ntfs_zalloc(aligned_sec_size); + if (!d_security) + return -ENOMEM; + + mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_SECURITY); + + fnd_sdh = fnd_get(); + if (!fnd_sdh) { + err = -ENOMEM; + goto out; + } + + root_sdh = indx_get_root(indx_sdh, ni, NULL, NULL); + if (!root_sdh) { + err = -EINVAL; + goto out; + } + + root_sii = indx_get_root(indx_sii, ni, NULL, NULL); + if (!root_sii) { + err = -EINVAL; + goto out; + } + + /* + * Check if such security already exists + * use "SDH" and hash -> to get the offset in "SDS" + */ + err = indx_find(indx_sdh, ni, root_sdh, &hash_key, sizeof(hash_key), + &d_security->key.sec_id, &diff, (struct NTFS_DE **)&e, + fnd_sdh); + if (err) + goto out; + + while (e) { + if (le32_to_cpu(e->sec_hdr.size) == new_sec_size) { + err = ntfs_read_run_nb(sbi, &ni->file.run, + le64_to_cpu(e->sec_hdr.off), + d_security, new_sec_size, NULL); + if (err) + goto out; + + if (le32_to_cpu(d_security->size) == new_sec_size && + d_security->key.hash == hash_key.hash && + !memcmp(d_security + 1, sd, size_sd)) { + *security_id = d_security->key.sec_id; + /*such security already exists*/ + err = 0; + goto out; + } + } + + err = indx_find_sort(indx_sdh, ni, root_sdh, + (struct NTFS_DE **)&e, fnd_sdh); + if (err) + goto out; + + if (!e || e->key.hash != hash_key.hash) + break; + } + + /* Zero unused space */ + next = sbi->security.next_off & (SecurityDescriptorsBlockSize - 1); + left = SecurityDescriptorsBlockSize - next; + + /* Zero gap until SecurityDescriptorsBlockSize */ + if (left < new_sec_size) { + /* zero "left" bytes from sbi->security.next_off */ + sbi->security.next_off += SecurityDescriptorsBlockSize + left; + } + + /* Zero tail of previous security */ + //used = ni->vfs_inode.i_size & (SecurityDescriptorsBlockSize - 1); + + /* + * Example: + * 0x40438 == ni->vfs_inode.i_size + * 0x00440 == sbi->security.next_off + * need to zero [0x438-0x440) + * if (next > used) { + * u32 tozero = next - used; + * zero "tozero" bytes from sbi->security.next_off - tozero + */ + + /* format new security descriptor */ + d_security->key.hash = hash_key.hash; + d_security->key.sec_id = cpu_to_le32(sbi->security.next_id); + d_security->off = cpu_to_le64(sbi->security.next_off); + d_security->size = cpu_to_le32(new_sec_size); + memcpy(d_security + 1, sd, size_sd); + + /* Write main SDS bucket */ + err = ntfs_sb_write_run(sbi, &ni->file.run, sbi->security.next_off, + d_security, aligned_sec_size); + + if (err) + goto out; + + mirr_off = sbi->security.next_off + SecurityDescriptorsBlockSize; + new_sds_size = mirr_off + aligned_sec_size; + + if (new_sds_size > ni->vfs_inode.i_size) { + err = attr_set_size(ni, ATTR_DATA, SDS_NAME, + ARRAY_SIZE(SDS_NAME), &ni->file.run, + new_sds_size, &new_sds_size, false, NULL); + if (err) + goto out; + } + + /* Write copy SDS bucket */ + err = ntfs_sb_write_run(sbi, &ni->file.run, mirr_off, d_security, + aligned_sec_size); + if (err) + goto out; + + /* Fill SII entry */ + sii_e.de.view.data_off = + cpu_to_le16(offsetof(struct NTFS_DE_SII, sec_hdr)); + sii_e.de.view.data_size = cpu_to_le16(SIZEOF_SECURITY_HDR); + sii_e.de.view.res = 0; + sii_e.de.size = cpu_to_le16(SIZEOF_SII_DIRENTRY); + sii_e.de.key_size = cpu_to_le16(sizeof(d_security->key.sec_id)); + sii_e.de.flags = 0; + sii_e.de.res = 0; + sii_e.sec_id = d_security->key.sec_id; + memcpy(&sii_e.sec_hdr, d_security, SIZEOF_SECURITY_HDR); + + err = indx_insert_entry(indx_sii, ni, &sii_e.de, NULL, NULL); + if (err) + goto out; + + /* Fill SDH entry */ + sdh_e.de.view.data_off = + cpu_to_le16(offsetof(struct NTFS_DE_SDH, sec_hdr)); + sdh_e.de.view.data_size = cpu_to_le16(SIZEOF_SECURITY_HDR); + sdh_e.de.view.res = 0; + sdh_e.de.size = cpu_to_le16(SIZEOF_SDH_DIRENTRY); + sdh_e.de.key_size = cpu_to_le16(sizeof(sdh_e.key)); + sdh_e.de.flags = 0; + sdh_e.de.res = 0; + sdh_e.key.hash = d_security->key.hash; + sdh_e.key.sec_id = d_security->key.sec_id; + memcpy(&sdh_e.sec_hdr, d_security, SIZEOF_SECURITY_HDR); + sdh_e.magic[0] = cpu_to_le16('I'); + sdh_e.magic[1] = cpu_to_le16('I'); + + fnd_clear(fnd_sdh); + err = indx_insert_entry(indx_sdh, ni, &sdh_e.de, (void *)(size_t)1, + fnd_sdh); + if (err) + goto out; + + *security_id = d_security->key.sec_id; + if (inserted) + *inserted = true; + + /* Update Id and offset for next descriptor */ + sbi->security.next_id += 1; + sbi->security.next_off += aligned_sec_size; + +out: + fnd_put(fnd_sdh); + mark_inode_dirty(&ni->vfs_inode); + ni_unlock(ni); + ntfs_free(d_security); + + return err; +} + +/* + * ntfs_reparse_init + * + * loads and parse $Extend/$Reparse + */ +int ntfs_reparse_init(struct ntfs_sb_info *sbi) +{ + int err; + struct ntfs_inode *ni = sbi->reparse.ni; + struct ntfs_index *indx = &sbi->reparse.index_r; + struct ATTRIB *attr; + struct ATTR_LIST_ENTRY *le; + const struct INDEX_ROOT *root_r; + + if (!ni) + return 0; + + le = NULL; + attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SR_NAME, + ARRAY_SIZE(SR_NAME), NULL, NULL); + if (!attr) { + err = -EINVAL; + goto out; + } + + root_r = resident_data(attr); + if (root_r->type != ATTR_ZERO || + root_r->rule != NTFS_COLLATION_TYPE_UINTS) { + err = -EINVAL; + goto out; + } + + err = indx_init(indx, sbi, attr, INDEX_MUTEX_SR); + if (err) + goto out; + +out: + return err; +} + +/* + * ntfs_objid_init + * + * loads and parse $Extend/$ObjId + */ +int ntfs_objid_init(struct ntfs_sb_info *sbi) +{ + int err; + struct ntfs_inode *ni = sbi->objid.ni; + struct ntfs_index *indx = &sbi->objid.index_o; + struct ATTRIB *attr; + struct ATTR_LIST_ENTRY *le; + const struct INDEX_ROOT *root; + + if (!ni) + return 0; + + le = NULL; + attr = ni_find_attr(ni, NULL, &le, ATTR_ROOT, SO_NAME, + ARRAY_SIZE(SO_NAME), NULL, NULL); + if (!attr) { + err = -EINVAL; + goto out; + } + + root = resident_data(attr); + if (root->type != ATTR_ZERO || + root->rule != NTFS_COLLATION_TYPE_UINTS) { + err = -EINVAL; + goto out; + } + + err = indx_init(indx, sbi, attr, INDEX_MUTEX_SO); + if (err) + goto out; + +out: + return err; +} + +int ntfs_objid_remove(struct ntfs_sb_info *sbi, struct GUID *guid) +{ + int err; + struct ntfs_inode *ni = sbi->objid.ni; + struct ntfs_index *indx = &sbi->objid.index_o; + + if (!ni) + return -EINVAL; + + mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_OBJID); + + err = indx_delete_entry(indx, ni, guid, sizeof(*guid), NULL); + + mark_inode_dirty(&ni->vfs_inode); + ni_unlock(ni); + + return err; +} + +int ntfs_insert_reparse(struct ntfs_sb_info *sbi, __le32 rtag, + const struct MFT_REF *ref) +{ + int err; + struct ntfs_inode *ni = sbi->reparse.ni; + struct ntfs_index *indx = &sbi->reparse.index_r; + struct NTFS_DE_R re; + + if (!ni) + return -EINVAL; + + memset(&re, 0, sizeof(re)); + + re.de.view.data_off = cpu_to_le16(offsetof(struct NTFS_DE_R, zero)); + re.de.size = cpu_to_le16(sizeof(struct NTFS_DE_R)); + re.de.key_size = cpu_to_le16(sizeof(re.key)); + + re.key.ReparseTag = rtag; + memcpy(&re.key.ref, ref, sizeof(*ref)); + + mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_REPARSE); + + err = indx_insert_entry(indx, ni, &re.de, NULL, NULL); + + mark_inode_dirty(&ni->vfs_inode); + ni_unlock(ni); + + return err; +} + +int ntfs_remove_reparse(struct ntfs_sb_info *sbi, __le32 rtag, + const struct MFT_REF *ref) +{ + int err, diff; + struct ntfs_inode *ni = sbi->reparse.ni; + struct ntfs_index *indx = &sbi->reparse.index_r; + struct ntfs_fnd *fnd = NULL; + struct REPARSE_KEY rkey; + struct NTFS_DE_R *re; + struct INDEX_ROOT *root_r; + + if (!ni) + return -EINVAL; + + rkey.ReparseTag = rtag; + rkey.ref = *ref; + + mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_REPARSE); + + if (rtag) { + err = indx_delete_entry(indx, ni, &rkey, sizeof(rkey), NULL); + goto out1; + } + + fnd = fnd_get(); + if (!fnd) { + err = -ENOMEM; + goto out1; + } + + root_r = indx_get_root(indx, ni, NULL, NULL); + if (!root_r) { + err = -EINVAL; + goto out; + } + + /* 1 - forces to ignore rkey.ReparseTag when comparing keys */ + err = indx_find(indx, ni, root_r, &rkey, sizeof(rkey), (void *)1, &diff, + (struct NTFS_DE **)&re, fnd); + if (err) + goto out; + + if (memcmp(&re->key.ref, ref, sizeof(*ref))) { + /* Impossible. Looks like volume corrupt?*/ + goto out; + } + + memcpy(&rkey, &re->key, sizeof(rkey)); + + fnd_put(fnd); + fnd = NULL; + + err = indx_delete_entry(indx, ni, &rkey, sizeof(rkey), NULL); + if (err) + goto out; + +out: + fnd_put(fnd); + +out1: + mark_inode_dirty(&ni->vfs_inode); + ni_unlock(ni); + + return err; +} + +static inline void ntfs_unmap_and_discard(struct ntfs_sb_info *sbi, CLST lcn, + CLST len) +{ + ntfs_unmap_meta(sbi->sb, lcn, len); + ntfs_discard(sbi, lcn, len); +} + +void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim) +{ + CLST end, i; + struct wnd_bitmap *wnd = &sbi->used.bitmap; + + down_write_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); + if (!wnd_is_used(wnd, lcn, len)) { + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + + end = lcn + len; + len = 0; + for (i = lcn; i < end; i++) { + if (wnd_is_used(wnd, i, 1)) { + if (!len) + lcn = i; + len += 1; + continue; + } + + if (!len) + continue; + + if (trim) + ntfs_unmap_and_discard(sbi, lcn, len); + + wnd_set_free(wnd, lcn, len); + len = 0; + } + + if (!len) + goto out; + } + + if (trim) + ntfs_unmap_and_discard(sbi, lcn, len); + wnd_set_free(wnd, lcn, len); + +out: + up_write(&wnd->rw_lock); +} + +/* + * run_deallocate + * + * deallocate clusters + */ +int run_deallocate(struct ntfs_sb_info *sbi, struct runs_tree *run, bool trim) +{ + CLST lcn, len; + size_t idx = 0; + + while (run_get_entry(run, idx++, NULL, &lcn, &len)) { + if (lcn == SPARSE_LCN) + continue; + + mark_as_free_ex(sbi, lcn, len, trim); + } + + return 0; +} diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c new file mode 100644 index 000000000000..931a7241ef00 --- /dev/null +++ b/fs/ntfs3/index.c @@ -0,0 +1,2641 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + */ + +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" + +static const struct INDEX_NAMES { + const __le16 *name; + u8 name_len; +} s_index_names[INDEX_MUTEX_TOTAL] = { + { I30_NAME, ARRAY_SIZE(I30_NAME) }, { SII_NAME, ARRAY_SIZE(SII_NAME) }, + { SDH_NAME, ARRAY_SIZE(SDH_NAME) }, { SO_NAME, ARRAY_SIZE(SO_NAME) }, + { SQ_NAME, ARRAY_SIZE(SQ_NAME) }, { SR_NAME, ARRAY_SIZE(SR_NAME) }, +}; + +/* + * compare two names in index + * if l1 != 0 + * both names are little endian on-disk ATTR_FILE_NAME structs + * else + * key1 - cpu_str, key2 - ATTR_FILE_NAME + */ +static int cmp_fnames(const void *key1, size_t l1, const void *key2, size_t l2, + const void *data) +{ + const struct ATTR_FILE_NAME *f2 = key2; + const struct ntfs_sb_info *sbi = data; + const struct ATTR_FILE_NAME *f1; + u16 fsize2; + bool both_case; + + if (l2 <= offsetof(struct ATTR_FILE_NAME, name)) + return -1; + + fsize2 = fname_full_size(f2); + if (l2 < fsize2) + return -1; + + both_case = f2->type != FILE_NAME_DOS /*&& !sbi->options.nocase*/; + if (!l1) { + const struct le_str *s2 = (struct le_str *)&f2->name_len; + + /* + * If names are equal (case insensitive) + * try to compare it case sensitive + */ + return ntfs_cmp_names_cpu(key1, s2, sbi->upcase, both_case); + } + + f1 = key1; + return ntfs_cmp_names(f1->name, f1->name_len, f2->name, f2->name_len, + sbi->upcase, both_case); +} + +/* $SII of $Secure and $Q of Quota */ +static int cmp_uint(const void *key1, size_t l1, const void *key2, size_t l2, + const void *data) +{ + const u32 *k1 = key1; + const u32 *k2 = key2; + + if (l2 < sizeof(u32)) + return -1; + + if (*k1 < *k2) + return -1; + if (*k1 > *k2) + return 1; + return 0; +} + +/* $SDH of $Secure */ +static int cmp_sdh(const void *key1, size_t l1, const void *key2, size_t l2, + const void *data) +{ + const struct SECURITY_KEY *k1 = key1; + const struct SECURITY_KEY *k2 = key2; + u32 t1, t2; + + if (l2 < sizeof(struct SECURITY_KEY)) + return -1; + + t1 = le32_to_cpu(k1->hash); + t2 = le32_to_cpu(k2->hash); + + /* First value is a hash value itself */ + if (t1 < t2) + return -1; + if (t1 > t2) + return 1; + + /* Second value is security Id */ + if (data) { + t1 = le32_to_cpu(k1->sec_id); + t2 = le32_to_cpu(k2->sec_id); + if (t1 < t2) + return -1; + if (t1 > t2) + return 1; + } + + return 0; +} + +/* $O of ObjId and "$R" for Reparse */ +static int cmp_uints(const void *key1, size_t l1, const void *key2, size_t l2, + const void *data) +{ + const __le32 *k1 = key1; + const __le32 *k2 = key2; + size_t count; + + if ((size_t)data == 1) { + /* + * ni_delete_all -> ntfs_remove_reparse -> delete all with this reference + * k1, k2 - pointers to REPARSE_KEY + */ + + k1 += 1; // skip REPARSE_KEY.ReparseTag + k2 += 1; // skip REPARSE_KEY.ReparseTag + if (l2 <= sizeof(int)) + return -1; + l2 -= sizeof(int); + if (l1 <= sizeof(int)) + return 1; + l1 -= sizeof(int); + } + + if (l2 < sizeof(int)) + return -1; + + for (count = min(l1, l2) >> 2; count > 0; --count, ++k1, ++k2) { + u32 t1 = le32_to_cpu(*k1); + u32 t2 = le32_to_cpu(*k2); + + if (t1 > t2) + return 1; + if (t1 < t2) + return -1; + } + + if (l1 > l2) + return 1; + if (l1 < l2) + return -1; + + return 0; +} + +static inline NTFS_CMP_FUNC get_cmp_func(const struct INDEX_ROOT *root) +{ + switch (root->type) { + case ATTR_NAME: + if (root->rule == NTFS_COLLATION_TYPE_FILENAME) + return &cmp_fnames; + break; + case ATTR_ZERO: + switch (root->rule) { + case NTFS_COLLATION_TYPE_UINT: + return &cmp_uint; + case NTFS_COLLATION_TYPE_SECURITY_HASH: + return &cmp_sdh; + case NTFS_COLLATION_TYPE_UINTS: + return &cmp_uints; + default: + break; + } + default: + break; + } + + return NULL; +} + +struct bmp_buf { + struct ATTRIB *b; + struct mft_inode *mi; + struct buffer_head *bh; + ulong *buf; + size_t bit; + u32 nbits; + u64 new_valid; +}; + +static int bmp_buf_get(struct ntfs_index *indx, struct ntfs_inode *ni, + size_t bit, struct bmp_buf *bbuf) +{ + struct ATTRIB *b; + size_t data_size, valid_size, vbo, off = bit >> 3; + struct ntfs_sb_info *sbi = ni->mi.sbi; + CLST vcn = off >> sbi->cluster_bits; + struct ATTR_LIST_ENTRY *le = NULL; + struct buffer_head *bh; + struct super_block *sb; + u32 blocksize; + const struct INDEX_NAMES *in = &s_index_names[indx->type]; + + bbuf->bh = NULL; + + b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len, + &vcn, &bbuf->mi); + bbuf->b = b; + if (!b) + return -EINVAL; + + if (!b->non_res) { + data_size = le32_to_cpu(b->res.data_size); + + if (off >= data_size) + return -EINVAL; + + bbuf->buf = (ulong *)resident_data(b); + bbuf->bit = 0; + bbuf->nbits = data_size * 8; + + return 0; + } + + data_size = le64_to_cpu(b->nres.data_size); + if (WARN_ON(off >= data_size)) { + /* looks like filesystem error */ + return -EINVAL; + } + + valid_size = le64_to_cpu(b->nres.valid_size); + + bh = ntfs_bread_run(sbi, &indx->bitmap_run, off); + if (!bh) + return -EIO; + + if (IS_ERR(bh)) + return PTR_ERR(bh); + + bbuf->bh = bh; + + if (buffer_locked(bh)) + __wait_on_buffer(bh); + + lock_buffer(bh); + + sb = sbi->sb; + blocksize = sb->s_blocksize; + + vbo = off & ~(size_t)sbi->block_mask; + + bbuf->new_valid = vbo + blocksize; + if (bbuf->new_valid <= valid_size) + bbuf->new_valid = 0; + else if (bbuf->new_valid > data_size) + bbuf->new_valid = data_size; + + if (vbo >= valid_size) { + memset(bh->b_data, 0, blocksize); + } else if (vbo + blocksize > valid_size) { + u32 voff = valid_size & sbi->block_mask; + + memset(bh->b_data + voff, 0, blocksize - voff); + } + + bbuf->buf = (ulong *)bh->b_data; + bbuf->bit = 8 * (off & ~(size_t)sbi->block_mask); + bbuf->nbits = 8 * blocksize; + + return 0; +} + +static void bmp_buf_put(struct bmp_buf *bbuf, bool dirty) +{ + struct buffer_head *bh = bbuf->bh; + struct ATTRIB *b = bbuf->b; + + if (!bh) { + if (b && !b->non_res && dirty) + bbuf->mi->dirty = true; + return; + } + + if (!dirty) + goto out; + + if (bbuf->new_valid) { + b->nres.valid_size = cpu_to_le64(bbuf->new_valid); + bbuf->mi->dirty = true; + } + + set_buffer_uptodate(bh); + mark_buffer_dirty(bh); + +out: + unlock_buffer(bh); + put_bh(bh); +} + +/* + * indx_mark_used + * + * marks the bit 'bit' as used + */ +static int indx_mark_used(struct ntfs_index *indx, struct ntfs_inode *ni, + size_t bit) +{ + int err; + struct bmp_buf bbuf; + + err = bmp_buf_get(indx, ni, bit, &bbuf); + if (err) + return err; + + __set_bit(bit - bbuf.bit, bbuf.buf); + + bmp_buf_put(&bbuf, true); + + return 0; +} + +/* + * indx_mark_free + * + * the bit 'bit' as free + */ +static int indx_mark_free(struct ntfs_index *indx, struct ntfs_inode *ni, + size_t bit) +{ + int err; + struct bmp_buf bbuf; + + err = bmp_buf_get(indx, ni, bit, &bbuf); + if (err) + return err; + + __clear_bit(bit - bbuf.bit, bbuf.buf); + + bmp_buf_put(&bbuf, true); + + return 0; +} + +/* + * if ntfs_readdir calls this function (indx_used_bit -> scan_nres_bitmap), + * inode is shared locked and no ni_lock + * use rw_semaphore for read/write access to bitmap_run + */ +static int scan_nres_bitmap(struct ntfs_inode *ni, struct ATTRIB *bitmap, + struct ntfs_index *indx, size_t from, + bool (*fn)(const ulong *buf, u32 bit, u32 bits, + size_t *ret), + size_t *ret) +{ + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct super_block *sb = sbi->sb; + struct runs_tree *run = &indx->bitmap_run; + struct rw_semaphore *lock = &indx->run_lock; + u32 nbits = sb->s_blocksize * 8; + u32 blocksize = sb->s_blocksize; + u64 valid_size = le64_to_cpu(bitmap->nres.valid_size); + u64 data_size = le64_to_cpu(bitmap->nres.data_size); + sector_t eblock = bytes_to_block(sb, data_size); + size_t vbo = from >> 3; + sector_t blk = (vbo & sbi->cluster_mask) >> sb->s_blocksize_bits; + sector_t vblock = vbo >> sb->s_blocksize_bits; + sector_t blen, block; + CLST lcn, clen, vcn, vcn_next; + size_t idx; + struct buffer_head *bh; + bool ok; + + *ret = MINUS_ONE_T; + + if (vblock >= eblock) + return 0; + + from &= nbits - 1; + vcn = vbo >> sbi->cluster_bits; + + down_read(lock); + ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx); + up_read(lock); + +next_run: + if (!ok) { + int err; + const struct INDEX_NAMES *name = &s_index_names[indx->type]; + + down_write(lock); + err = attr_load_runs_vcn(ni, ATTR_BITMAP, name->name, + name->name_len, run, vcn); + up_write(lock); + if (err) + return err; + down_read(lock); + ok = run_lookup_entry(run, vcn, &lcn, &clen, &idx); + up_read(lock); + if (!ok) + return -EINVAL; + } + + blen = (sector_t)clen * sbi->blocks_per_cluster; + block = (sector_t)lcn * sbi->blocks_per_cluster; + + for (; blk < blen; blk++, from = 0) { + bh = ntfs_bread(sb, block + blk); + if (!bh) + return -EIO; + + vbo = (u64)vblock << sb->s_blocksize_bits; + if (vbo >= valid_size) { + memset(bh->b_data, 0, blocksize); + } else if (vbo + blocksize > valid_size) { + u32 voff = valid_size & sbi->block_mask; + + memset(bh->b_data + voff, 0, blocksize - voff); + } + + if (vbo + blocksize > data_size) + nbits = 8 * (data_size - vbo); + + ok = nbits > from ? (*fn)((ulong *)bh->b_data, from, nbits, ret) + : false; + put_bh(bh); + + if (ok) { + *ret += 8 * vbo; + return 0; + } + + if (++vblock >= eblock) { + *ret = MINUS_ONE_T; + return 0; + } + } + blk = 0; + vcn_next = vcn + clen; + down_read(lock); + ok = run_get_entry(run, ++idx, &vcn, &lcn, &clen) && vcn == vcn_next; + if (!ok) + vcn = vcn_next; + up_read(lock); + goto next_run; +} + +static bool scan_for_free(const ulong *buf, u32 bit, u32 bits, size_t *ret) +{ + size_t pos = find_next_zero_bit(buf, bits, bit); + + if (pos >= bits) + return false; + *ret = pos; + return true; +} + +/* + * indx_find_free + * + * looks for free bit + * returns -1 if no free bits + */ +static int indx_find_free(struct ntfs_index *indx, struct ntfs_inode *ni, + size_t *bit, struct ATTRIB **bitmap) +{ + struct ATTRIB *b; + struct ATTR_LIST_ENTRY *le = NULL; + const struct INDEX_NAMES *in = &s_index_names[indx->type]; + int err; + + b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len, + NULL, NULL); + + if (!b) + return -ENOENT; + + *bitmap = b; + *bit = MINUS_ONE_T; + + if (!b->non_res) { + u32 nbits = 8 * le32_to_cpu(b->res.data_size); + size_t pos = find_next_zero_bit(resident_data(b), nbits, 0); + + if (pos < nbits) + *bit = pos; + } else { + err = scan_nres_bitmap(ni, b, indx, 0, &scan_for_free, bit); + + if (err) + return err; + } + + return 0; +} + +static bool scan_for_used(const ulong *buf, u32 bit, u32 bits, size_t *ret) +{ + size_t pos = find_next_bit(buf, bits, bit); + + if (pos >= bits) + return false; + *ret = pos; + return true; +} + +/* + * indx_used_bit + * + * looks for used bit + * returns MINUS_ONE_T if no used bits + */ +int indx_used_bit(struct ntfs_index *indx, struct ntfs_inode *ni, size_t *bit) +{ + struct ATTRIB *b; + struct ATTR_LIST_ENTRY *le = NULL; + size_t from = *bit; + const struct INDEX_NAMES *in = &s_index_names[indx->type]; + int err; + + b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len, + NULL, NULL); + + if (!b) + return -ENOENT; + + *bit = MINUS_ONE_T; + + if (!b->non_res) { + u32 nbits = le32_to_cpu(b->res.data_size) * 8; + size_t pos = find_next_bit(resident_data(b), nbits, from); + + if (pos < nbits) + *bit = pos; + } else { + err = scan_nres_bitmap(ni, b, indx, from, &scan_for_used, bit); + if (err) + return err; + } + + return 0; +} + +/* + * hdr_find_split + * + * finds a point at which the index allocation buffer would like to + * be split. + * NOTE: This function should never return 'END' entry NULL returns on error + */ +static const struct NTFS_DE *hdr_find_split(const struct INDEX_HDR *hdr) +{ + size_t o; + const struct NTFS_DE *e = hdr_first_de(hdr); + u32 used_2 = le32_to_cpu(hdr->used) >> 1; + u16 esize = le16_to_cpu(e->size); + + if (!e || de_is_last(e)) + return NULL; + + for (o = le32_to_cpu(hdr->de_off) + esize; o < used_2; o += esize) { + const struct NTFS_DE *p = e; + + e = Add2Ptr(hdr, o); + + /* We must not return END entry */ + if (de_is_last(e)) + return p; + + esize = le16_to_cpu(e->size); + } + + return e; +} + +/* + * hdr_insert_head + * + * inserts some entries at the beginning of the buffer. + * It is used to insert entries into a newly-created buffer. + */ +static const struct NTFS_DE *hdr_insert_head(struct INDEX_HDR *hdr, + const void *ins, u32 ins_bytes) +{ + u32 to_move; + struct NTFS_DE *e = hdr_first_de(hdr); + u32 used = le32_to_cpu(hdr->used); + + if (!e) + return NULL; + + /* Now we just make room for the inserted entries and jam it in. */ + to_move = used - le32_to_cpu(hdr->de_off); + memmove(Add2Ptr(e, ins_bytes), e, to_move); + memcpy(e, ins, ins_bytes); + hdr->used = cpu_to_le32(used + ins_bytes); + + return e; +} + +void fnd_clear(struct ntfs_fnd *fnd) +{ + int i; + + for (i = 0; i < fnd->level; i++) { + struct indx_node *n = fnd->nodes[i]; + + if (!n) + continue; + + put_indx_node(n); + fnd->nodes[i] = NULL; + } + fnd->level = 0; + fnd->root_de = NULL; +} + +static int fnd_push(struct ntfs_fnd *fnd, struct indx_node *n, + struct NTFS_DE *e) +{ + int i; + + i = fnd->level; + if (i < 0 || i >= ARRAY_SIZE(fnd->nodes)) + return -EINVAL; + fnd->nodes[i] = n; + fnd->de[i] = e; + fnd->level += 1; + return 0; +} + +static struct indx_node *fnd_pop(struct ntfs_fnd *fnd) +{ + struct indx_node *n; + int i = fnd->level; + + i -= 1; + n = fnd->nodes[i]; + fnd->nodes[i] = NULL; + fnd->level = i; + + return n; +} + +static bool fnd_is_empty(struct ntfs_fnd *fnd) +{ + if (!fnd->level) + return !fnd->root_de; + + return !fnd->de[fnd->level - 1]; +} + +/* + * hdr_find_e + * + * locates an entry the index buffer. + * If no matching entry is found, it returns the first entry which is greater + * than the desired entry If the search key is greater than all the entries the + * buffer, it returns the 'end' entry. This function does a binary search of the + * current index buffer, for the first entry that is <= to the search value + * Returns NULL if error + */ +static struct NTFS_DE *hdr_find_e(const struct ntfs_index *indx, + const struct INDEX_HDR *hdr, const void *key, + size_t key_len, const void *ctx, int *diff) +{ + struct NTFS_DE *e; + NTFS_CMP_FUNC cmp = indx->cmp; + u32 e_size, e_key_len; + u32 end = le32_to_cpu(hdr->used); + u32 off = le32_to_cpu(hdr->de_off); + +#ifdef NTFS3_INDEX_BINARY_SEARCH + int max_idx = 0, fnd, min_idx; + int nslots = 64; + u16 *offs; + + if (end > 0x10000) + goto next; + + offs = ntfs_malloc(sizeof(u16) * nslots); + if (!offs) + goto next; + + /* use binary search algorithm */ +next1: + if (off + sizeof(struct NTFS_DE) > end) { + e = NULL; + goto out1; + } + e = Add2Ptr(hdr, off); + e_size = le16_to_cpu(e->size); + + if (e_size < sizeof(struct NTFS_DE) || off + e_size > end) { + e = NULL; + goto out1; + } + + if (max_idx >= nslots) { + u16 *ptr; + int new_slots = QuadAlign(2 * nslots); + + ptr = ntfs_malloc(sizeof(u16) * new_slots); + if (ptr) + memcpy(ptr, offs, sizeof(u16) * max_idx); + ntfs_free(offs); + offs = ptr; + nslots = new_slots; + if (!ptr) + goto next; + } + + /* Store entry table */ + offs[max_idx] = off; + + if (!de_is_last(e)) { + off += e_size; + max_idx += 1; + goto next1; + } + + /* + * Table of pointers is created + * Use binary search to find entry that is <= to the search value + */ + fnd = -1; + min_idx = 0; + + while (min_idx <= max_idx) { + int mid_idx = min_idx + ((max_idx - min_idx) >> 1); + int diff2; + + e = Add2Ptr(hdr, offs[mid_idx]); + + e_key_len = le16_to_cpu(e->key_size); + + diff2 = (*cmp)(key, key_len, e + 1, e_key_len, ctx); + + if (!diff2) { + *diff = 0; + goto out1; + } + + if (diff2 < 0) { + max_idx = mid_idx - 1; + fnd = mid_idx; + if (!fnd) + break; + } else { + min_idx = mid_idx + 1; + } + } + + if (fnd == -1) { + e = NULL; + goto out1; + } + + *diff = -1; + e = Add2Ptr(hdr, offs[fnd]); + +out1: + ntfs_free(offs); + + return e; +#endif + +next: + /* + * Entries index are sorted + * Enumerate all entries until we find entry that is <= to the search value + */ + if (off + sizeof(struct NTFS_DE) > end) + return NULL; + + e = Add2Ptr(hdr, off); + e_size = le16_to_cpu(e->size); + + if (e_size < sizeof(struct NTFS_DE) || off + e_size > end) + return NULL; + + off += e_size; + + e_key_len = le16_to_cpu(e->key_size); + + *diff = (*cmp)(key, key_len, e + 1, e_key_len, ctx); + if (!*diff) + return e; + + if (*diff <= 0) + return e; + + if (de_is_last(e)) { + *diff = 1; + return e; + } + goto next; +} + +/* + * hdr_insert_de + * + * inserts an index entry into the buffer. + * 'before' should be a pointer previously returned from hdr_find_e + */ +static struct NTFS_DE *hdr_insert_de(const struct ntfs_index *indx, + struct INDEX_HDR *hdr, + const struct NTFS_DE *de, + struct NTFS_DE *before, const void *ctx) +{ + int diff; + size_t off = PtrOffset(hdr, before); + u32 used = le32_to_cpu(hdr->used); + u32 total = le32_to_cpu(hdr->total); + u16 de_size = le16_to_cpu(de->size); + + /* First, check to see if there's enough room */ + if (used + de_size > total) + return NULL; + + /* We know there's enough space, so we know we'll succeed. */ + if (before) { + /* Check that before is inside Index */ + if (off >= used || off < le32_to_cpu(hdr->de_off) || + off + le16_to_cpu(before->size) > total) { + return NULL; + } + goto ok; + } + /* No insert point is applied. Get it manually */ + before = hdr_find_e(indx, hdr, de + 1, le16_to_cpu(de->key_size), ctx, + &diff); + if (!before) + return NULL; + off = PtrOffset(hdr, before); + +ok: + /* Now we just make room for the entry and jam it in. */ + memmove(Add2Ptr(before, de_size), before, used - off); + + hdr->used = cpu_to_le32(used + de_size); + memcpy(before, de, de_size); + + return before; +} + +/* + * hdr_delete_de + * + * removes an entry from the index buffer + */ +static inline struct NTFS_DE *hdr_delete_de(struct INDEX_HDR *hdr, + struct NTFS_DE *re) +{ + u32 used = le32_to_cpu(hdr->used); + u16 esize = le16_to_cpu(re->size); + u32 off = PtrOffset(hdr, re); + int bytes = used - (off + esize); + + if (off >= used || esize < sizeof(struct NTFS_DE) || + bytes < sizeof(struct NTFS_DE)) + return NULL; + + hdr->used = cpu_to_le32(used - esize); + memmove(re, Add2Ptr(re, esize), bytes); + + return re; +} + +void indx_clear(struct ntfs_index *indx) +{ + run_close(&indx->alloc_run); + run_close(&indx->bitmap_run); +} + +int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi, + const struct ATTRIB *attr, enum index_mutex_classed type) +{ + u32 t32; + const struct INDEX_ROOT *root = resident_data(attr); + + /* Check root fields */ + if (!root->index_block_clst) + return -EINVAL; + + indx->type = type; + indx->idx2vbn_bits = __ffs(root->index_block_clst); + + t32 = le32_to_cpu(root->index_block_size); + indx->index_bits = blksize_bits(t32); + + /* Check index record size */ + if (t32 < sbi->cluster_size) { + /* index record is smaller than a cluster, use 512 blocks */ + if (t32 != root->index_block_clst * SECTOR_SIZE) + return -EINVAL; + + /* Check alignment to a cluster */ + if ((sbi->cluster_size >> SECTOR_SHIFT) & + (root->index_block_clst - 1)) { + return -EINVAL; + } + + indx->vbn2vbo_bits = SECTOR_SHIFT; + } else { + /* index record must be a multiple of cluster size */ + if (t32 != root->index_block_clst << sbi->cluster_bits) + return -EINVAL; + + indx->vbn2vbo_bits = sbi->cluster_bits; + } + + init_rwsem(&indx->run_lock); + + indx->cmp = get_cmp_func(root); + return indx->cmp ? 0 : -EINVAL; +} + +static struct indx_node *indx_new(struct ntfs_index *indx, + struct ntfs_inode *ni, CLST vbn, + const __le64 *sub_vbn) +{ + int err; + struct NTFS_DE *e; + struct indx_node *r; + struct INDEX_HDR *hdr; + struct INDEX_BUFFER *index; + u64 vbo = (u64)vbn << indx->vbn2vbo_bits; + u32 bytes = 1u << indx->index_bits; + u16 fn; + u32 eo; + + r = ntfs_zalloc(sizeof(struct indx_node)); + if (!r) + return ERR_PTR(-ENOMEM); + + index = ntfs_zalloc(bytes); + if (!index) { + ntfs_free(r); + return ERR_PTR(-ENOMEM); + } + + err = ntfs_get_bh(ni->mi.sbi, &indx->alloc_run, vbo, bytes, &r->nb); + + if (err) { + ntfs_free(index); + ntfs_free(r); + return ERR_PTR(err); + } + + /* Create header */ + index->rhdr.sign = NTFS_INDX_SIGNATURE; + index->rhdr.fix_off = cpu_to_le16(sizeof(struct INDEX_BUFFER)); // 0x28 + fn = (bytes >> SECTOR_SHIFT) + 1; // 9 + index->rhdr.fix_num = cpu_to_le16(fn); + index->vbn = cpu_to_le64(vbn); + hdr = &index->ihdr; + eo = QuadAlign(sizeof(struct INDEX_BUFFER) + fn * sizeof(short)); + hdr->de_off = cpu_to_le32(eo); + + e = Add2Ptr(hdr, eo); + + if (sub_vbn) { + e->flags = NTFS_IE_LAST | NTFS_IE_HAS_SUBNODES; + e->size = cpu_to_le16(sizeof(struct NTFS_DE) + sizeof(u64)); + hdr->used = + cpu_to_le32(eo + sizeof(struct NTFS_DE) + sizeof(u64)); + de_set_vbn_le(e, *sub_vbn); + hdr->flags = 1; + } else { + e->size = cpu_to_le16(sizeof(struct NTFS_DE)); + hdr->used = cpu_to_le32(eo + sizeof(struct NTFS_DE)); + e->flags = NTFS_IE_LAST; + } + + hdr->total = cpu_to_le32(bytes - offsetof(struct INDEX_BUFFER, ihdr)); + + r->index = index; + return r; +} + +struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni, + struct ATTRIB **attr, struct mft_inode **mi) +{ + struct ATTR_LIST_ENTRY *le = NULL; + struct ATTRIB *a; + const struct INDEX_NAMES *in = &s_index_names[indx->type]; + + a = ni_find_attr(ni, NULL, &le, ATTR_ROOT, in->name, in->name_len, NULL, + mi); + if (!a) + return NULL; + + if (attr) + *attr = a; + + return resident_data_ex(a, sizeof(struct INDEX_ROOT)); +} + +static int indx_write(struct ntfs_index *indx, struct ntfs_inode *ni, + struct indx_node *node, int sync) +{ + struct INDEX_BUFFER *ib = node->index; + + return ntfs_write_bh(ni->mi.sbi, &ib->rhdr, &node->nb, sync); +} + +/* + * if ntfs_readdir calls this function + * inode is shared locked and no ni_lock + * use rw_semaphore for read/write access to alloc_run + */ +int indx_read(struct ntfs_index *indx, struct ntfs_inode *ni, CLST vbn, + struct indx_node **node) +{ + int err; + struct INDEX_BUFFER *ib; + struct runs_tree *run = &indx->alloc_run; + struct rw_semaphore *lock = &indx->run_lock; + u64 vbo = (u64)vbn << indx->vbn2vbo_bits; + u32 bytes = 1u << indx->index_bits; + struct indx_node *in = *node; + const struct INDEX_NAMES *name; + + if (!in) { + in = ntfs_zalloc(sizeof(struct indx_node)); + if (!in) + return -ENOMEM; + } else { + nb_put(&in->nb); + } + + ib = in->index; + if (!ib) { + ib = ntfs_malloc(bytes); + if (!ib) { + err = -ENOMEM; + goto out; + } + } + + down_read(lock); + err = ntfs_read_bh(ni->mi.sbi, run, vbo, &ib->rhdr, bytes, &in->nb); + up_read(lock); + if (!err) + goto ok; + + if (err == -E_NTFS_FIXUP) + goto ok; + + if (err != -ENOENT) + goto out; + + name = &s_index_names[indx->type]; + down_write(lock); + err = attr_load_runs_range(ni, ATTR_ALLOC, name->name, name->name_len, + run, vbo, vbo + bytes); + up_write(lock); + if (err) + goto out; + + down_read(lock); + err = ntfs_read_bh(ni->mi.sbi, run, vbo, &ib->rhdr, bytes, &in->nb); + up_read(lock); + if (err == -E_NTFS_FIXUP) + goto ok; + + if (err) + goto out; + +ok: + if (err == -E_NTFS_FIXUP) { + ntfs_write_bh(ni->mi.sbi, &ib->rhdr, &in->nb, 0); + err = 0; + } + + in->index = ib; + *node = in; + +out: + if (ib != in->index) + ntfs_free(ib); + + if (*node != in) { + nb_put(&in->nb); + ntfs_free(in); + } + + return err; +} + +/* + * indx_find + * + * scans NTFS directory for given entry + */ +int indx_find(struct ntfs_index *indx, struct ntfs_inode *ni, + const struct INDEX_ROOT *root, const void *key, size_t key_len, + const void *ctx, int *diff, struct NTFS_DE **entry, + struct ntfs_fnd *fnd) +{ + int err; + struct NTFS_DE *e; + const struct INDEX_HDR *hdr; + struct indx_node *node; + + if (!root) + root = indx_get_root(&ni->dir, ni, NULL, NULL); + + if (!root) { + err = -EINVAL; + goto out; + } + + hdr = &root->ihdr; + + /* Check cache */ + e = fnd->level ? fnd->de[fnd->level - 1] : fnd->root_de; + if (e && !de_is_last(e) && + !(*indx->cmp)(key, key_len, e + 1, le16_to_cpu(e->key_size), ctx)) { + *entry = e; + *diff = 0; + return 0; + } + + /* Soft finder reset */ + fnd_clear(fnd); + + /* Lookup entry that is <= to the search value */ + e = hdr_find_e(indx, hdr, key, key_len, ctx, diff); + if (!e) + return -EINVAL; + + if (fnd) + fnd->root_de = e; + + err = 0; + + for (;;) { + node = NULL; + if (*diff >= 0 || !de_has_vcn_ex(e)) { + *entry = e; + goto out; + } + + /* Read next level. */ + err = indx_read(indx, ni, de_get_vbn(e), &node); + if (err) + goto out; + + /* Lookup entry that is <= to the search value */ + e = hdr_find_e(indx, &node->index->ihdr, key, key_len, ctx, + diff); + if (!e) { + err = -EINVAL; + put_indx_node(node); + goto out; + } + + fnd_push(fnd, node, e); + } + +out: + return err; +} + +int indx_find_sort(struct ntfs_index *indx, struct ntfs_inode *ni, + const struct INDEX_ROOT *root, struct NTFS_DE **entry, + struct ntfs_fnd *fnd) +{ + int err; + struct indx_node *n = NULL; + struct NTFS_DE *e; + size_t iter = 0; + int level = fnd->level; + + if (!*entry) { + /* Start find */ + e = hdr_first_de(&root->ihdr); + if (!e) + return 0; + fnd_clear(fnd); + fnd->root_de = e; + } else if (!level) { + if (de_is_last(fnd->root_de)) { + *entry = NULL; + return 0; + } + + e = hdr_next_de(&root->ihdr, fnd->root_de); + if (!e) + return -EINVAL; + fnd->root_de = e; + } else { + n = fnd->nodes[level - 1]; + e = fnd->de[level - 1]; + + if (de_is_last(e)) + goto pop_level; + + e = hdr_next_de(&n->index->ihdr, e); + if (!e) + return -EINVAL; + + fnd->de[level - 1] = e; + } + + /* Just to avoid tree cycle */ +next_iter: + if (iter++ >= 1000) + return -EINVAL; + + while (de_has_vcn_ex(e)) { + if (le16_to_cpu(e->size) < + sizeof(struct NTFS_DE) + sizeof(u64)) { + if (n) { + fnd_pop(fnd); + ntfs_free(n); + } + return -EINVAL; + } + + /* Read next level */ + err = indx_read(indx, ni, de_get_vbn(e), &n); + if (err) + return err; + + /* Try next level */ + e = hdr_first_de(&n->index->ihdr); + if (!e) { + ntfs_free(n); + return -EINVAL; + } + + fnd_push(fnd, n, e); + } + + if (le16_to_cpu(e->size) > sizeof(struct NTFS_DE)) { + *entry = e; + return 0; + } + +pop_level: + for (;;) { + if (!de_is_last(e)) + goto next_iter; + + /* Pop one level */ + if (n) { + fnd_pop(fnd); + ntfs_free(n); + } + + level = fnd->level; + + if (level) { + n = fnd->nodes[level - 1]; + e = fnd->de[level - 1]; + } else if (fnd->root_de) { + n = NULL; + e = fnd->root_de; + fnd->root_de = NULL; + } else { + *entry = NULL; + return 0; + } + + if (le16_to_cpu(e->size) > sizeof(struct NTFS_DE)) { + *entry = e; + if (!fnd->root_de) + fnd->root_de = e; + return 0; + } + } +} + +int indx_find_raw(struct ntfs_index *indx, struct ntfs_inode *ni, + const struct INDEX_ROOT *root, struct NTFS_DE **entry, + size_t *off, struct ntfs_fnd *fnd) +{ + int err; + struct indx_node *n = NULL; + struct NTFS_DE *e = NULL; + struct NTFS_DE *e2; + size_t bit; + CLST next_used_vbn; + CLST next_vbn; + u32 record_size = ni->mi.sbi->record_size; + + /* Use non sorted algorithm */ + if (!*entry) { + /* This is the first call */ + e = hdr_first_de(&root->ihdr); + if (!e) + return 0; + fnd_clear(fnd); + fnd->root_de = e; + + /* The first call with setup of initial element */ + if (*off >= record_size) { + next_vbn = (((*off - record_size) >> indx->index_bits)) + << indx->idx2vbn_bits; + /* jump inside cycle 'for'*/ + goto next; + } + + /* Start enumeration from root */ + *off = 0; + } else if (!fnd->root_de) + return -EINVAL; + + for (;;) { + /* Check if current entry can be used */ + if (e && le16_to_cpu(e->size) > sizeof(struct NTFS_DE)) + goto ok; + + if (!fnd->level) { + /* Continue to enumerate root */ + if (!de_is_last(fnd->root_de)) { + e = hdr_next_de(&root->ihdr, fnd->root_de); + if (!e) + return -EINVAL; + fnd->root_de = e; + continue; + } + + /* Start to enumerate indexes from 0 */ + next_vbn = 0; + } else { + /* Continue to enumerate indexes */ + e2 = fnd->de[fnd->level - 1]; + + n = fnd->nodes[fnd->level - 1]; + + if (!de_is_last(e2)) { + e = hdr_next_de(&n->index->ihdr, e2); + if (!e) + return -EINVAL; + fnd->de[fnd->level - 1] = e; + continue; + } + + /* Continue with next index */ + next_vbn = le64_to_cpu(n->index->vbn) + + root->index_block_clst; + } + +next: + /* Release current index */ + if (n) { + fnd_pop(fnd); + put_indx_node(n); + n = NULL; + } + + /* Skip all free indexes */ + bit = next_vbn >> indx->idx2vbn_bits; + err = indx_used_bit(indx, ni, &bit); + if (err == -ENOENT || bit == MINUS_ONE_T) { + /* No used indexes */ + *entry = NULL; + return 0; + } + + next_used_vbn = bit << indx->idx2vbn_bits; + + /* Read buffer into memory */ + err = indx_read(indx, ni, next_used_vbn, &n); + if (err) + return err; + + e = hdr_first_de(&n->index->ihdr); + fnd_push(fnd, n, e); + if (!e) + return -EINVAL; + } + +ok: + /* return offset to restore enumerator if necessary */ + if (!n) { + /* 'e' points in root */ + *off = PtrOffset(&root->ihdr, e); + } else { + /* 'e' points in index */ + *off = (le64_to_cpu(n->index->vbn) << indx->vbn2vbo_bits) + + record_size + PtrOffset(&n->index->ihdr, e); + } + + *entry = e; + return 0; +} + +/* + * indx_create_allocate + * + * create "Allocation + Bitmap" attributes + */ +static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, + CLST *vbn) +{ + int err = -ENOMEM; + struct ntfs_sb_info *sbi = ni->mi.sbi; + struct ATTRIB *bitmap; + struct ATTRIB *alloc; + u32 data_size = 1u << indx->index_bits; + u32 alloc_size = ntfs_up_cluster(sbi, data_size); + CLST len = alloc_size >> sbi->cluster_bits; + const struct INDEX_NAMES *in = &s_index_names[indx->type]; + CLST alen; + struct runs_tree run; + + run_init(&run); + + err = attr_allocate_clusters(sbi, &run, 0, 0, len, NULL, 0, &alen, 0, + NULL); + if (err) + goto out; + + err = ni_insert_nonresident(ni, ATTR_ALLOC, in->name, in->name_len, + &run, 0, len, 0, &alloc, NULL); + if (err) + goto out1; + + alloc->nres.valid_size = alloc->nres.data_size = cpu_to_le64(data_size); + + err = ni_insert_resident(ni, bitmap_size(1), ATTR_BITMAP, in->name, + in->name_len, &bitmap, NULL); + if (err) + goto out2; + + if (in->name == I30_NAME) { + ni->vfs_inode.i_size = data_size; + inode_set_bytes(&ni->vfs_inode, alloc_size); + } + + memcpy(&indx->alloc_run, &run, sizeof(run)); + + *vbn = 0; + + return 0; + +out2: + mi_remove_attr(&ni->mi, alloc); + +out1: + run_deallocate(sbi, &run, false); + +out: + return err; +} + +/* + * indx_add_allocate + * + * add clusters to index + */ +static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, + CLST *vbn) +{ + int err; + size_t bit; + u64 data_size; + u64 bmp_size, bmp_size_v; + struct ATTRIB *bmp, *alloc; + struct mft_inode *mi; + const struct INDEX_NAMES *in = &s_index_names[indx->type]; + + err = indx_find_free(indx, ni, &bit, &bmp); + if (err) + goto out1; + + if (bit != MINUS_ONE_T) { + bmp = NULL; + } else { + if (bmp->non_res) { + bmp_size = le64_to_cpu(bmp->nres.data_size); + bmp_size_v = le64_to_cpu(bmp->nres.valid_size); + } else { + bmp_size = bmp_size_v = le32_to_cpu(bmp->res.data_size); + } + + bit = bmp_size << 3; + } + + data_size = (u64)(bit + 1) << indx->index_bits; + + if (bmp) { + /* Increase bitmap */ + err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, + &indx->bitmap_run, bitmap_size(bit + 1), + NULL, true, NULL); + if (err) + goto out1; + } + + alloc = ni_find_attr(ni, NULL, NULL, ATTR_ALLOC, in->name, in->name_len, + NULL, &mi); + if (!alloc) { + if (bmp) + goto out2; + goto out1; + } + + /* Increase allocation */ + err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, + &indx->alloc_run, data_size, &data_size, true, + NULL); + if (err) { + if (bmp) + goto out2; + goto out1; + } + + *vbn = bit << indx->idx2vbn_bits; + + return 0; + +out2: + /* Ops (no space?) */ + attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, + &indx->bitmap_run, bmp_size, &bmp_size_v, false, NULL); + +out1: + return err; +} + +/* + * indx_insert_into_root + * + * attempts to insert an entry into the index root + * If necessary, it will twiddle the index b-tree. + */ +static int indx_insert_into_root(struct ntfs_index *indx, struct ntfs_inode *ni, + const struct NTFS_DE *new_de, + struct NTFS_DE *root_de, const void *ctx, + struct ntfs_fnd *fnd) +{ + int err = 0; + struct NTFS_DE *e, *e0, *re; + struct mft_inode *mi; + struct ATTRIB *attr; + struct MFT_REC *rec; + struct INDEX_HDR *hdr; + struct indx_node *n; + CLST new_vbn; + __le64 *sub_vbn, t_vbn; + u16 new_de_size; + u32 hdr_used, hdr_total, asize, used, to_move; + u32 root_size, new_root_size; + struct ntfs_sb_info *sbi; + int ds_root; + struct INDEX_ROOT *root, *a_root = NULL; + + /* Get the record this root placed in */ + root = indx_get_root(indx, ni, &attr, &mi); + if (!root) + goto out; + + /* + * Try easy case: + * hdr_insert_de will succeed if there's room the root for the new entry. + */ + hdr = &root->ihdr; + sbi = ni->mi.sbi; + rec = mi->mrec; + used = le32_to_cpu(rec->used); + new_de_size = le16_to_cpu(new_de->size); + hdr_used = le32_to_cpu(hdr->used); + hdr_total = le32_to_cpu(hdr->total); + asize = le32_to_cpu(attr->size); + root_size = le32_to_cpu(attr->res.data_size); + + ds_root = new_de_size + hdr_used - hdr_total; + + if (used + ds_root < sbi->max_bytes_per_attr) { + /* make a room for new elements */ + mi_resize_attr(mi, attr, ds_root); + hdr->total = cpu_to_le32(hdr_total + ds_root); + e = hdr_insert_de(indx, hdr, new_de, root_de, ctx); + WARN_ON(!e); + fnd_clear(fnd); + fnd->root_de = e; + + return 0; + } + + /* Make a copy of root attribute to restore if error */ + a_root = ntfs_memdup(attr, asize); + if (!a_root) { + err = -ENOMEM; + goto out; + } + + /* copy all the non-end entries from the index root to the new buffer.*/ + to_move = 0; + e0 = hdr_first_de(hdr); + + /* Calculate the size to copy */ + for (e = e0;; e = hdr_next_de(hdr, e)) { + if (!e) { + err = -EINVAL; + goto out; + } + + if (de_is_last(e)) + break; + to_move += le16_to_cpu(e->size); + } + + n = NULL; + if (!to_move) { + re = NULL; + } else { + re = ntfs_memdup(e0, to_move); + if (!re) { + err = -ENOMEM; + goto out; + } + } + + sub_vbn = NULL; + if (de_has_vcn(e)) { + t_vbn = de_get_vbn_le(e); + sub_vbn = &t_vbn; + } + + new_root_size = sizeof(struct INDEX_ROOT) + sizeof(struct NTFS_DE) + + sizeof(u64); + ds_root = new_root_size - root_size; + + if (ds_root > 0 && used + ds_root > sbi->max_bytes_per_attr) { + /* make root external */ + err = -EOPNOTSUPP; + goto out; + } + + if (ds_root) + mi_resize_attr(mi, attr, ds_root); + + /* Fill first entry (vcn will be set later) */ + e = (struct NTFS_DE *)(root + 1); + memset(e, 0, sizeof(struct NTFS_DE)); + e->size = cpu_to_le16(sizeof(struct NTFS_DE) + sizeof(u64)); + e->flags = NTFS_IE_HAS_SUBNODES | NTFS_IE_LAST; + + hdr->flags = 1; + hdr->used = hdr->total = + cpu_to_le32(new_root_size - offsetof(struct INDEX_ROOT, ihdr)); + + fnd->root_de = hdr_first_de(hdr); + mi->dirty = true; + + /* Create alloc and bitmap attributes (if not) */ + err = run_is_empty(&indx->alloc_run) + ? indx_create_allocate(indx, ni, &new_vbn) + : indx_add_allocate(indx, ni, &new_vbn); + + /* layout of record may be changed, so rescan root */ + root = indx_get_root(indx, ni, &attr, &mi); + if (!root) { + /* bug? */ + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + err = -EINVAL; + goto out1; + } + + if (err) { + /* restore root */ + if (mi_resize_attr(mi, attr, -ds_root)) + memcpy(attr, a_root, asize); + else { + /* bug? */ + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + } + goto out1; + } + + e = (struct NTFS_DE *)(root + 1); + *(__le64 *)(e + 1) = cpu_to_le64(new_vbn); + mi->dirty = true; + + /* now we can create/format the new buffer and copy the entries into */ + n = indx_new(indx, ni, new_vbn, sub_vbn); + if (IS_ERR(n)) { + err = PTR_ERR(n); + goto out1; + } + + hdr = &n->index->ihdr; + hdr_used = le32_to_cpu(hdr->used); + hdr_total = le32_to_cpu(hdr->total); + + /* Copy root entries into new buffer */ + hdr_insert_head(hdr, re, to_move); + + /* Update bitmap attribute */ + indx_mark_used(indx, ni, new_vbn >> indx->idx2vbn_bits); + + /* Check if we can insert new entry new index buffer */ + if (hdr_used + new_de_size > hdr_total) { + /* + * This occurs if mft record is the same or bigger than index + * buffer. Move all root new index and have no space to add + * new entry classic case when mft record is 1K and index + * buffer 4K the problem should not occurs + */ + ntfs_free(re); + indx_write(indx, ni, n, 0); + + put_indx_node(n); + fnd_clear(fnd); + err = indx_insert_entry(indx, ni, new_de, ctx, fnd); + goto out; + } + + /* + * Now root is a parent for new index buffer + * Insert NewEntry a new buffer + */ + e = hdr_insert_de(indx, hdr, new_de, NULL, ctx); + if (!e) { + err = -EINVAL; + goto out1; + } + fnd_push(fnd, n, e); + + /* Just write updates index into disk */ + indx_write(indx, ni, n, 0); + + n = NULL; + +out1: + ntfs_free(re); + if (n) + put_indx_node(n); + +out: + ntfs_free(a_root); + return err; +} + +/* + * indx_insert_into_buffer + * + * attempts to insert an entry into an Index Allocation Buffer. + * If necessary, it will split the buffer. + */ +static int +indx_insert_into_buffer(struct ntfs_index *indx, struct ntfs_inode *ni, + struct INDEX_ROOT *root, const struct NTFS_DE *new_de, + const void *ctx, int level, struct ntfs_fnd *fnd) +{ + int err; + const struct NTFS_DE *sp; + struct NTFS_DE *e, *de_t, *up_e = NULL; + struct indx_node *n2 = NULL; + struct indx_node *n1 = fnd->nodes[level]; + struct INDEX_HDR *hdr1 = &n1->index->ihdr; + struct INDEX_HDR *hdr2; + u32 to_copy, used; + CLST new_vbn; + __le64 t_vbn, *sub_vbn; + u16 sp_size; + + /* Try the most easy case */ + e = fnd->level - 1 == level ? fnd->de[level] : NULL; + e = hdr_insert_de(indx, hdr1, new_de, e, ctx); + fnd->de[level] = e; + if (e) { + /* Just write updated index into disk */ + indx_write(indx, ni, n1, 0); + return 0; + } + + /* + * No space to insert into buffer. Split it. + * To split we: + * - Save split point ('cause index buffers will be changed) + * - Allocate NewBuffer and copy all entries <= sp into new buffer + * - Remove all entries (sp including) from TargetBuffer + * - Insert NewEntry into left or right buffer (depending on sp <=> + * NewEntry) + * - Insert sp into parent buffer (or root) + * - Make sp a parent for new buffer + */ + sp = hdr_find_split(hdr1); + if (!sp) + return -EINVAL; + + sp_size = le16_to_cpu(sp->size); + up_e = ntfs_malloc(sp_size + sizeof(u64)); + if (!up_e) + return -ENOMEM; + memcpy(up_e, sp, sp_size); + + if (!hdr1->flags) { + up_e->flags |= NTFS_IE_HAS_SUBNODES; + up_e->size = cpu_to_le16(sp_size + sizeof(u64)); + sub_vbn = NULL; + } else { + t_vbn = de_get_vbn_le(up_e); + sub_vbn = &t_vbn; + } + + /* Allocate on disk a new index allocation buffer. */ + err = indx_add_allocate(indx, ni, &new_vbn); + if (err) + goto out; + + /* Allocate and format memory a new index buffer */ + n2 = indx_new(indx, ni, new_vbn, sub_vbn); + if (IS_ERR(n2)) { + err = PTR_ERR(n2); + goto out; + } + + hdr2 = &n2->index->ihdr; + + /* Make sp a parent for new buffer */ + de_set_vbn(up_e, new_vbn); + + /* copy all the entries <= sp into the new buffer. */ + de_t = hdr_first_de(hdr1); + to_copy = PtrOffset(de_t, sp); + hdr_insert_head(hdr2, de_t, to_copy); + + /* remove all entries (sp including) from hdr1 */ + used = le32_to_cpu(hdr1->used) - to_copy - sp_size; + memmove(de_t, Add2Ptr(sp, sp_size), used - le32_to_cpu(hdr1->de_off)); + hdr1->used = cpu_to_le32(used); + + /* Insert new entry into left or right buffer (depending on sp <=> new_de) */ + hdr_insert_de(indx, + (*indx->cmp)(new_de + 1, le16_to_cpu(new_de->key_size), + up_e + 1, le16_to_cpu(up_e->key_size), + ctx) < 0 + ? hdr2 + : hdr1, + new_de, NULL, ctx); + + indx_mark_used(indx, ni, new_vbn >> indx->idx2vbn_bits); + + indx_write(indx, ni, n1, 0); + indx_write(indx, ni, n2, 0); + + put_indx_node(n2); + + /* + * we've finished splitting everybody, so we are ready to + * insert the promoted entry into the parent. + */ + if (!level) { + /* Insert in root */ + err = indx_insert_into_root(indx, ni, up_e, NULL, ctx, fnd); + if (err) + goto out; + } else { + /* + * The target buffer's parent is another index buffer + * TODO: Remove recursion + */ + err = indx_insert_into_buffer(indx, ni, root, up_e, ctx, + level - 1, fnd); + if (err) + goto out; + } + +out: + ntfs_free(up_e); + + return err; +} + +/* + * indx_insert_entry + * + * inserts new entry into index + */ +int indx_insert_entry(struct ntfs_index *indx, struct ntfs_inode *ni, + const struct NTFS_DE *new_de, const void *ctx, + struct ntfs_fnd *fnd) +{ + int err; + int diff; + struct NTFS_DE *e; + struct ntfs_fnd *fnd_a = NULL; + struct INDEX_ROOT *root; + + if (!fnd) { + fnd_a = fnd_get(); + if (!fnd_a) { + err = -ENOMEM; + goto out1; + } + fnd = fnd_a; + } + + root = indx_get_root(indx, ni, NULL, NULL); + if (!root) { + err = -EINVAL; + goto out; + } + + if (fnd_is_empty(fnd)) { + /* Find the spot the tree where we want to insert the new entry. */ + err = indx_find(indx, ni, root, new_de + 1, + le16_to_cpu(new_de->key_size), ctx, &diff, &e, + fnd); + if (err) + goto out; + + if (!diff) { + err = -EEXIST; + goto out; + } + } + + if (!fnd->level) { + /* The root is also a leaf, so we'll insert the new entry into it. */ + err = indx_insert_into_root(indx, ni, new_de, fnd->root_de, ctx, + fnd); + if (err) + goto out; + } else { + /* found a leaf buffer, so we'll insert the new entry into it.*/ + err = indx_insert_into_buffer(indx, ni, root, new_de, ctx, + fnd->level - 1, fnd); + if (err) + goto out; + } + +out: + fnd_put(fnd_a); +out1: + return err; +} + +/* + * indx_find_buffer + * + * locates a buffer the tree. + */ +static struct indx_node *indx_find_buffer(struct ntfs_index *indx, + struct ntfs_inode *ni, + const struct INDEX_ROOT *root, + __le64 vbn, struct indx_node *n) +{ + int err; + const struct NTFS_DE *e; + struct indx_node *r; + const struct INDEX_HDR *hdr = n ? &n->index->ihdr : &root->ihdr; + + /* Step 1: Scan one level */ + for (e = hdr_first_de(hdr);; e = hdr_next_de(hdr, e)) { + if (!e) + return ERR_PTR(-EINVAL); + + if (de_has_vcn(e) && vbn == de_get_vbn_le(e)) + return n; + + if (de_is_last(e)) + break; + } + + /* Step2: Do recursion */ + e = Add2Ptr(hdr, le32_to_cpu(hdr->de_off)); + for (;;) { + if (de_has_vcn_ex(e)) { + err = indx_read(indx, ni, de_get_vbn(e), &n); + if (err) + return ERR_PTR(err); + + r = indx_find_buffer(indx, ni, root, vbn, n); + if (r) + return r; + } + + if (de_is_last(e)) + break; + + e = Add2Ptr(e, le16_to_cpu(e->size)); + } + + return NULL; +} + +/* + * indx_shrink + * + * deallocates unused tail indexes + */ +static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni, + size_t bit) +{ + int err = 0; + u64 bpb, new_data; + size_t nbits; + struct ATTRIB *b; + struct ATTR_LIST_ENTRY *le = NULL; + const struct INDEX_NAMES *in = &s_index_names[indx->type]; + + b = ni_find_attr(ni, NULL, &le, ATTR_BITMAP, in->name, in->name_len, + NULL, NULL); + + if (!b) + return -ENOENT; + + if (!b->non_res) { + unsigned long pos; + const unsigned long *bm = resident_data(b); + + nbits = le32_to_cpu(b->res.data_size) * 8; + + if (bit >= nbits) + return 0; + + pos = find_next_bit(bm, nbits, bit); + if (pos < nbits) + return 0; + } else { + size_t used = MINUS_ONE_T; + + nbits = le64_to_cpu(b->nres.data_size) * 8; + + if (bit >= nbits) + return 0; + + err = scan_nres_bitmap(ni, b, indx, bit, &scan_for_used, &used); + if (err) + return err; + + if (used != MINUS_ONE_T) + return 0; + } + + new_data = (u64)bit << indx->index_bits; + + err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, + &indx->alloc_run, new_data, &new_data, false, NULL); + if (err) + return err; + + bpb = bitmap_size(bit); + if (bpb * 8 == nbits) + return 0; + + err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, + &indx->bitmap_run, bpb, &bpb, false, NULL); + + return err; +} + +static int indx_free_children(struct ntfs_index *indx, struct ntfs_inode *ni, + const struct NTFS_DE *e, bool trim) +{ + int err; + struct indx_node *n; + struct INDEX_HDR *hdr; + CLST vbn = de_get_vbn(e); + size_t i; + + err = indx_read(indx, ni, vbn, &n); + if (err) + return err; + + hdr = &n->index->ihdr; + /* First, recurse into the children, if any.*/ + if (hdr_has_subnode(hdr)) { + for (e = hdr_first_de(hdr); e; e = hdr_next_de(hdr, e)) { + indx_free_children(indx, ni, e, false); + if (de_is_last(e)) + break; + } + } + + put_indx_node(n); + + i = vbn >> indx->idx2vbn_bits; + /* We've gotten rid of the children; add this buffer to the free list. */ + indx_mark_free(indx, ni, i); + + if (!trim) + return 0; + + /* + * If there are no used indexes after current free index + * then we can truncate allocation and bitmap + * Use bitmap to estimate the case + */ + indx_shrink(indx, ni, i + 1); + return 0; +} + +/* + * indx_get_entry_to_replace + * + * finds a replacement entry for a deleted entry + * always returns a node entry: + * NTFS_IE_HAS_SUBNODES is set the flags and the size includes the sub_vcn + */ +static int indx_get_entry_to_replace(struct ntfs_index *indx, + struct ntfs_inode *ni, + const struct NTFS_DE *de_next, + struct NTFS_DE **de_to_replace, + struct ntfs_fnd *fnd) +{ + int err; + int level = -1; + CLST vbn; + struct NTFS_DE *e, *te, *re; + struct indx_node *n; + struct INDEX_BUFFER *ib; + + *de_to_replace = NULL; + + /* Find first leaf entry down from de_next */ + vbn = de_get_vbn(de_next); + for (;;) { + n = NULL; + err = indx_read(indx, ni, vbn, &n); + if (err) + goto out; + + e = hdr_first_de(&n->index->ihdr); + fnd_push(fnd, n, e); + + if (!de_is_last(e)) { + /* + * This buffer is non-empty, so its first entry could be used as the + * replacement entry. + */ + level = fnd->level - 1; + } + + if (!de_has_vcn(e)) + break; + + /* This buffer is a node. Continue to go down */ + vbn = de_get_vbn(e); + } + + if (level == -1) + goto out; + + n = fnd->nodes[level]; + te = hdr_first_de(&n->index->ihdr); + /* Copy the candidate entry into the replacement entry buffer. */ + re = ntfs_malloc(le16_to_cpu(te->size) + sizeof(u64)); + if (!re) { + err = -ENOMEM; + goto out; + } + + *de_to_replace = re; + memcpy(re, te, le16_to_cpu(te->size)); + + if (!de_has_vcn(re)) { + /* + * The replacement entry we found doesn't have a sub_vcn. increase its size + * to hold one. + */ + le16_add_cpu(&re->size, sizeof(u64)); + re->flags |= NTFS_IE_HAS_SUBNODES; + } else { + /* + * The replacement entry we found was a node entry, which means that all + * its child buffers are empty. Return them to the free pool. + */ + indx_free_children(indx, ni, te, true); + } + + /* + * Expunge the replacement entry from its former location, + * and then write that buffer. + */ + ib = n->index; + e = hdr_delete_de(&ib->ihdr, te); + + fnd->de[level] = e; + indx_write(indx, ni, n, 0); + + /* Check to see if this action created an empty leaf. */ + if (ib_is_leaf(ib) && ib_is_empty(ib)) + return 0; + +out: + fnd_clear(fnd); + return err; +} + +/* + * indx_delete_entry + * + * deletes an entry from the index. + */ +int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, + const void *key, u32 key_len, const void *ctx) +{ + int err, diff; + struct INDEX_ROOT *root; + struct INDEX_HDR *hdr; + struct ntfs_fnd *fnd, *fnd2; + struct INDEX_BUFFER *ib; + struct NTFS_DE *e, *re, *next, *prev, *me; + struct indx_node *n, *n2d = NULL; + __le64 sub_vbn; + int level, level2; + struct ATTRIB *attr; + struct mft_inode *mi; + u32 e_size, root_size, new_root_size; + size_t trim_bit; + const struct INDEX_NAMES *in; + + fnd = fnd_get(); + if (!fnd) { + err = -ENOMEM; + goto out2; + } + + fnd2 = fnd_get(); + if (!fnd2) { + err = -ENOMEM; + goto out1; + } + + root = indx_get_root(indx, ni, &attr, &mi); + if (!root) { + err = -EINVAL; + goto out; + } + + /* Locate the entry to remove. */ + err = indx_find(indx, ni, root, key, key_len, ctx, &diff, &e, fnd); + if (err) + goto out; + + if (!e || diff) { + err = -ENOENT; + goto out; + } + + level = fnd->level; + + if (level) { + n = fnd->nodes[level - 1]; + e = fnd->de[level - 1]; + ib = n->index; + hdr = &ib->ihdr; + } else { + hdr = &root->ihdr; + e = fnd->root_de; + n = NULL; + } + + e_size = le16_to_cpu(e->size); + + if (!de_has_vcn_ex(e)) { + /* The entry to delete is a leaf, so we can just rip it out */ + hdr_delete_de(hdr, e); + + if (!level) { + hdr->total = hdr->used; + + /* Shrink resident root attribute */ + mi_resize_attr(mi, attr, 0 - e_size); + goto out; + } + + indx_write(indx, ni, n, 0); + + /* + * Check to see if removing that entry made + * the leaf empty. + */ + if (ib_is_leaf(ib) && ib_is_empty(ib)) { + fnd_pop(fnd); + fnd_push(fnd2, n, e); + } + } else { + /* + * The entry we wish to delete is a node buffer, so we + * have to find a replacement for it. + */ + next = de_get_next(e); + + err = indx_get_entry_to_replace(indx, ni, next, &re, fnd2); + if (err) + goto out; + + if (re) { + de_set_vbn_le(re, de_get_vbn_le(e)); + hdr_delete_de(hdr, e); + + err = level ? indx_insert_into_buffer(indx, ni, root, + re, ctx, + fnd->level - 1, + fnd) + : indx_insert_into_root(indx, ni, re, e, + ctx, fnd); + ntfs_free(re); + + if (err) + goto out; + } else { + /* + * There is no replacement for the current entry. + * This means that the subtree rooted at its node is empty, + * and can be deleted, which turn means that the node can + * just inherit the deleted entry sub_vcn + */ + indx_free_children(indx, ni, next, true); + + de_set_vbn_le(next, de_get_vbn_le(e)); + hdr_delete_de(hdr, e); + if (level) { + indx_write(indx, ni, n, 0); + } else { + hdr->total = hdr->used; + + /* Shrink resident root attribute */ + mi_resize_attr(mi, attr, 0 - e_size); + } + } + } + + /* Delete a branch of tree */ + if (!fnd2 || !fnd2->level) + goto out; + + /* Reinit root 'cause it can be changed */ + root = indx_get_root(indx, ni, &attr, &mi); + if (!root) { + err = -EINVAL; + goto out; + } + + n2d = NULL; + sub_vbn = fnd2->nodes[0]->index->vbn; + level2 = 0; + level = fnd->level; + + hdr = level ? &fnd->nodes[level - 1]->index->ihdr : &root->ihdr; + + /* Scan current level */ + for (e = hdr_first_de(hdr);; e = hdr_next_de(hdr, e)) { + if (!e) { + err = -EINVAL; + goto out; + } + + if (de_has_vcn(e) && sub_vbn == de_get_vbn_le(e)) + break; + + if (de_is_last(e)) { + e = NULL; + break; + } + } + + if (!e) { + /* Do slow search from root */ + struct indx_node *in; + + fnd_clear(fnd); + + in = indx_find_buffer(indx, ni, root, sub_vbn, NULL); + if (IS_ERR(in)) { + err = PTR_ERR(in); + goto out; + } + + if (in) + fnd_push(fnd, in, NULL); + } + + /* Merge fnd2 -> fnd */ + for (level = 0; level < fnd2->level; level++) { + fnd_push(fnd, fnd2->nodes[level], fnd2->de[level]); + fnd2->nodes[level] = NULL; + } + fnd2->level = 0; + + hdr = NULL; + for (level = fnd->level; level; level--) { + struct indx_node *in = fnd->nodes[level - 1]; + + ib = in->index; + if (ib_is_empty(ib)) { + sub_vbn = ib->vbn; + } else { + hdr = &ib->ihdr; + n2d = in; + level2 = level; + break; + } + } + + if (!hdr) + hdr = &root->ihdr; + + e = hdr_first_de(hdr); + if (!e) { + err = -EINVAL; + goto out; + } + + if (hdr != &root->ihdr || !de_is_last(e)) { + prev = NULL; + while (!de_is_last(e)) { + if (de_has_vcn(e) && sub_vbn == de_get_vbn_le(e)) + break; + prev = e; + e = hdr_next_de(hdr, e); + if (!e) { + err = -EINVAL; + goto out; + } + } + + if (sub_vbn != de_get_vbn_le(e)) { + /* + * Didn't find the parent entry, although this buffer is the parent trail. + * Something is corrupt. + */ + err = -EINVAL; + goto out; + } + + if (de_is_last(e)) { + /* + * Since we can't remove the end entry, we'll remove its + * predecessor instead. This means we have to transfer the + * predecessor's sub_vcn to the end entry. + * Note: that this index block is not empty, so the + * predecessor must exist + */ + if (!prev) { + err = -EINVAL; + goto out; + } + + if (de_has_vcn(prev)) { + de_set_vbn_le(e, de_get_vbn_le(prev)); + } else if (de_has_vcn(e)) { + le16_sub_cpu(&e->size, sizeof(u64)); + e->flags &= ~NTFS_IE_HAS_SUBNODES; + le32_sub_cpu(&hdr->used, sizeof(u64)); + } + e = prev; + } + + /* + * Copy the current entry into a temporary buffer (stripping off its + * down-pointer, if any) and delete it from the current buffer or root, + * as appropriate. + */ + e_size = le16_to_cpu(e->size); + me = ntfs_memdup(e, e_size); + if (!me) { + err = -ENOMEM; + goto out; + } + + if (de_has_vcn(me)) { + me->flags &= ~NTFS_IE_HAS_SUBNODES; + le16_sub_cpu(&me->size, sizeof(u64)); + } + + hdr_delete_de(hdr, e); + + if (hdr == &root->ihdr) { + level = 0; + hdr->total = hdr->used; + + /* Shrink resident root attribute */ + mi_resize_attr(mi, attr, 0 - e_size); + } else { + indx_write(indx, ni, n2d, 0); + level = level2; + } + + /* Mark unused buffers as free */ + trim_bit = -1; + for (; level < fnd->level; level++) { + ib = fnd->nodes[level]->index; + if (ib_is_empty(ib)) { + size_t k = le64_to_cpu(ib->vbn) >> + indx->idx2vbn_bits; + + indx_mark_free(indx, ni, k); + if (k < trim_bit) + trim_bit = k; + } + } + + fnd_clear(fnd); + /*fnd->root_de = NULL;*/ + + /* + * Re-insert the entry into the tree. + * Find the spot the tree where we want to insert the new entry. + */ + err = indx_insert_entry(indx, ni, me, ctx, fnd); + ntfs_free(me); + if (err) + goto out; + + if (trim_bit != -1) + indx_shrink(indx, ni, trim_bit); + } else { + /* + * This tree needs to be collapsed down to an empty root. + * Recreate the index root as an empty leaf and free all the bits the + * index allocation bitmap. + */ + fnd_clear(fnd); + fnd_clear(fnd2); + + in = &s_index_names[indx->type]; + + err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, + &indx->alloc_run, 0, NULL, false, NULL); + err = ni_remove_attr(ni, ATTR_ALLOC, in->name, in->name_len, + false, NULL); + run_close(&indx->alloc_run); + + err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len, + &indx->bitmap_run, 0, NULL, false, NULL); + err = ni_remove_attr(ni, ATTR_BITMAP, in->name, in->name_len, + false, NULL); + run_close(&indx->bitmap_run); + + root = indx_get_root(indx, ni, &attr, &mi); + if (!root) { + err = -EINVAL; + goto out; + } + + root_size = le32_to_cpu(attr->res.data_size); + new_root_size = + sizeof(struct INDEX_ROOT) + sizeof(struct NTFS_DE); + + if (new_root_size != root_size && + !mi_resize_attr(mi, attr, new_root_size - root_size)) { + err = -EINVAL; + goto out; + } + + /* Fill first entry */ + e = (struct NTFS_DE *)(root + 1); + e->ref.low = 0; + e->ref.high = 0; + e->ref.seq = 0; + e->size = cpu_to_le16(sizeof(struct NTFS_DE)); + e->flags = NTFS_IE_LAST; // 0x02 + e->key_size = 0; + e->res = 0; + + hdr = &root->ihdr; + hdr->flags = 0; + hdr->used = hdr->total = cpu_to_le32( + new_root_size - offsetof(struct INDEX_ROOT, ihdr)); + mi->dirty = true; + } + +out: + fnd_put(fnd2); +out1: + fnd_put(fnd); +out2: + return err; +} + +int indx_update_dup(struct ntfs_inode *ni, struct ntfs_sb_info *sbi, + const struct ATTR_FILE_NAME *fname, + const struct NTFS_DUP_INFO *dup, int sync) +{ + int err, diff; + struct NTFS_DE *e = NULL; + struct ATTR_FILE_NAME *e_fname; + struct ntfs_fnd *fnd; + struct INDEX_ROOT *root; + struct mft_inode *mi; + struct ntfs_index *indx = &ni->dir; + + fnd = fnd_get(); + if (!fnd) { + err = -ENOMEM; + goto out1; + } + + root = indx_get_root(indx, ni, NULL, &mi); + if (!root) { + err = -EINVAL; + goto out; + } + + /* Find entries tree and on disk */ + err = indx_find(indx, ni, root, fname, fname_full_size(fname), sbi, + &diff, &e, fnd); + if (err) + goto out; + + if (!e) { + err = -EINVAL; + goto out; + } + + if (diff) { + err = -EINVAL; + goto out; + } + + e_fname = (struct ATTR_FILE_NAME *)(e + 1); + + if (!memcmp(&e_fname->dup, dup, sizeof(*dup))) { + /* nothing to update in index! Try to avoid this call */ + goto out; + } + + memcpy(&e_fname->dup, dup, sizeof(*dup)); + + if (fnd->level) { + err = indx_write(indx, ni, fnd->nodes[fnd->level - 1], sync); + } else if (sync) { + mi->dirty = true; + err = mi_write(mi, 1); + } else { + mi->dirty = true; + mark_inode_dirty(&ni->vfs_inode); + } + +out: + fnd_put(fnd); + +out1: + return err; +} diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c new file mode 100644 index 000000000000..419d7b0a88f3 --- /dev/null +++ b/fs/ntfs3/inode.c @@ -0,0 +1,2032 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" + +/* + * ntfs_read_mft + * + * reads record and parses MFT + */ +static struct inode *ntfs_read_mft(struct inode *inode, + const struct cpu_str *name, + const struct MFT_REF *ref) +{ + int err = 0; + struct ntfs_inode *ni = ntfs_i(inode); + struct super_block *sb = inode->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + mode_t mode = 0; + struct ATTR_STD_INFO5 *std5 = NULL; + struct ATTR_LIST_ENTRY *le; + struct ATTRIB *attr; + bool is_match = false; + bool is_root = false; + bool is_dir; + unsigned long ino = inode->i_ino; + u32 rp_fa = 0, asize, t32; + u16 roff, rsize, names = 0; + const struct ATTR_FILE_NAME *fname = NULL; + const struct INDEX_ROOT *root; + struct REPARSE_DATA_BUFFER rp; // 0x18 bytes + u64 t64; + struct MFT_REC *rec; + struct runs_tree *run; + + inode->i_op = NULL; + + err = mi_init(&ni->mi, sbi, ino); + if (err) + goto out; + + if (!sbi->mft.ni && ino == MFT_REC_MFT && !sb->s_root) { + t64 = sbi->mft.lbo >> sbi->cluster_bits; + t32 = bytes_to_cluster(sbi, MFT_REC_VOL * sbi->record_size); + sbi->mft.ni = ni; + init_rwsem(&ni->file.run_lock); + + if (!run_add_entry(&ni->file.run, 0, t64, t32, true)) { + err = -ENOMEM; + goto out; + } + } + + err = mi_read(&ni->mi, ino == MFT_REC_MFT); + + if (err) + goto out; + + rec = ni->mi.mrec; + + if (sbi->flags & NTFS_FLAGS_LOG_REPLAYING) { + ; + } else if (ref->seq != rec->seq) { + err = -EINVAL; + ntfs_err(sb, "MFT: r=%lx, expect seq=%x instead of %x!", ino, + le16_to_cpu(ref->seq), le16_to_cpu(rec->seq)); + goto out; + } else if (!is_rec_inuse(rec)) { + err = -EINVAL; + ntfs_err(sb, "Inode r=%x is not in use!", (u32)ino); + goto out; + } + + if (le32_to_cpu(rec->total) != sbi->record_size) { + // bad inode? + err = -EINVAL; + goto out; + } + + if (!is_rec_base(rec)) + goto Ok; + + /* record should contain $I30 root */ + is_dir = rec->flags & RECORD_FLAG_DIR; + + inode->i_generation = le16_to_cpu(rec->seq); + + /* Enumerate all struct Attributes MFT */ + le = NULL; + attr = NULL; + + /* + * to reduce tab pressure use goto instead of + * while( (attr = ni_enum_attr_ex(ni, attr, &le, NULL) )) + */ +next_attr: + run = NULL; + err = -EINVAL; + attr = ni_enum_attr_ex(ni, attr, &le, NULL); + if (!attr) + goto end_enum; + + if (le && le->vcn) { + /* This is non primary attribute segment. Ignore if not MFT */ + if (ino != MFT_REC_MFT || attr->type != ATTR_DATA) + goto next_attr; + + run = &ni->file.run; + asize = le32_to_cpu(attr->size); + goto attr_unpack_run; + } + + roff = attr->non_res ? 0 : le16_to_cpu(attr->res.data_off); + rsize = attr->non_res ? 0 : le32_to_cpu(attr->res.data_size); + asize = le32_to_cpu(attr->size); + + switch (attr->type) { + case ATTR_STD: + if (attr->non_res || + asize < sizeof(struct ATTR_STD_INFO) + roff || + rsize < sizeof(struct ATTR_STD_INFO)) + goto out; + + if (std5) + goto next_attr; + + std5 = Add2Ptr(attr, roff); + +#ifdef STATX_BTIME + nt2kernel(std5->cr_time, &ni->i_crtime); +#endif + nt2kernel(std5->a_time, &inode->i_atime); + nt2kernel(std5->c_time, &inode->i_ctime); + nt2kernel(std5->m_time, &inode->i_mtime); + + ni->std_fa = std5->fa; + + if (asize >= sizeof(struct ATTR_STD_INFO5) + roff && + rsize >= sizeof(struct ATTR_STD_INFO5)) + ni->std_security_id = std5->security_id; + goto next_attr; + + case ATTR_LIST: + if (attr->name_len || le || ino == MFT_REC_LOG) + goto out; + + err = ntfs_load_attr_list(ni, attr); + if (err) + goto out; + + le = NULL; + attr = NULL; + goto next_attr; + + case ATTR_NAME: + if (attr->non_res || asize < SIZEOF_ATTRIBUTE_FILENAME + roff || + rsize < SIZEOF_ATTRIBUTE_FILENAME) + goto out; + + fname = Add2Ptr(attr, roff); + if (fname->type == FILE_NAME_DOS) + goto next_attr; + + names += 1; + if (name && name->len == fname->name_len && + !ntfs_cmp_names_cpu(name, (struct le_str *)&fname->name_len, + NULL, false)) + is_match = true; + + goto next_attr; + + case ATTR_DATA: + if (is_dir) { + /* ignore data attribute in dir record */ + goto next_attr; + } + + if (ino == MFT_REC_BADCLUST && !attr->non_res) + goto next_attr; + + if (attr->name_len && + ((ino != MFT_REC_BADCLUST || !attr->non_res || + attr->name_len != ARRAY_SIZE(BAD_NAME) || + memcmp(attr_name(attr), BAD_NAME, sizeof(BAD_NAME))) && + (ino != MFT_REC_SECURE || !attr->non_res || + attr->name_len != ARRAY_SIZE(SDS_NAME) || + memcmp(attr_name(attr), SDS_NAME, sizeof(SDS_NAME))))) { + /* file contains stream attribute. ignore it */ + goto next_attr; + } + + if (is_attr_sparsed(attr)) + ni->std_fa |= FILE_ATTRIBUTE_SPARSE_FILE; + else + ni->std_fa &= ~FILE_ATTRIBUTE_SPARSE_FILE; + + if (is_attr_compressed(attr)) + ni->std_fa |= FILE_ATTRIBUTE_COMPRESSED; + else + ni->std_fa &= ~FILE_ATTRIBUTE_COMPRESSED; + + if (is_attr_encrypted(attr)) + ni->std_fa |= FILE_ATTRIBUTE_ENCRYPTED; + else + ni->std_fa &= ~FILE_ATTRIBUTE_ENCRYPTED; + + if (!attr->non_res) { + ni->i_valid = inode->i_size = rsize; + inode_set_bytes(inode, rsize); + t32 = asize; + } else { + t32 = le16_to_cpu(attr->nres.run_off); + } + + mode = S_IFREG | (0777 & sbi->options.fs_fmask_inv); + + if (!attr->non_res) { + ni->ni_flags |= NI_FLAG_RESIDENT; + goto next_attr; + } + + inode_set_bytes(inode, attr_ondisk_size(attr)); + + ni->i_valid = le64_to_cpu(attr->nres.valid_size); + inode->i_size = le64_to_cpu(attr->nres.data_size); + if (!attr->nres.alloc_size) + goto next_attr; + + run = ino == MFT_REC_BITMAP ? &sbi->used.bitmap.run + : &ni->file.run; + break; + + case ATTR_ROOT: + if (attr->non_res) + goto out; + + root = Add2Ptr(attr, roff); + is_root = true; + + if (attr->name_len != ARRAY_SIZE(I30_NAME) || + memcmp(attr_name(attr), I30_NAME, sizeof(I30_NAME))) + goto next_attr; + + if (root->type != ATTR_NAME || + root->rule != NTFS_COLLATION_TYPE_FILENAME) + goto out; + + if (!is_dir) + goto next_attr; + + ni->ni_flags |= NI_FLAG_DIR; + + err = indx_init(&ni->dir, sbi, attr, INDEX_MUTEX_I30); + if (err) + goto out; + + mode = sb->s_root + ? (S_IFDIR | (0777 & sbi->options.fs_dmask_inv)) + : (S_IFDIR | 0777); + goto next_attr; + + case ATTR_ALLOC: + if (!is_root || attr->name_len != ARRAY_SIZE(I30_NAME) || + memcmp(attr_name(attr), I30_NAME, sizeof(I30_NAME))) + goto next_attr; + + inode->i_size = le64_to_cpu(attr->nres.data_size); + ni->i_valid = le64_to_cpu(attr->nres.valid_size); + inode_set_bytes(inode, le64_to_cpu(attr->nres.alloc_size)); + + run = &ni->dir.alloc_run; + break; + + case ATTR_BITMAP: + if (ino == MFT_REC_MFT) { + if (!attr->non_res) + goto out; +#ifndef CONFIG_NTFS3_64BIT_CLUSTER + /* 0x20000000 = 2^32 / 8 */ + if (le64_to_cpu(attr->nres.alloc_size) >= 0x20000000) + goto out; +#endif + run = &sbi->mft.bitmap.run; + break; + } else if (is_dir && attr->name_len == ARRAY_SIZE(I30_NAME) && + !memcmp(attr_name(attr), I30_NAME, + sizeof(I30_NAME)) && + attr->non_res) { + run = &ni->dir.bitmap_run; + break; + } + goto next_attr; + + case ATTR_REPARSE: + if (attr->name_len) + goto next_attr; + + rp_fa = ni_parse_reparse(ni, attr, &rp); + switch (rp_fa) { + case REPARSE_LINK: + if (!attr->non_res) { + inode->i_size = rsize; + inode_set_bytes(inode, rsize); + t32 = asize; + } else { + inode->i_size = + le64_to_cpu(attr->nres.data_size); + t32 = le16_to_cpu(attr->nres.run_off); + } + + /* Looks like normal symlink */ + ni->i_valid = inode->i_size; + + /* Clear directory bit */ + if (ni->ni_flags & NI_FLAG_DIR) { + indx_clear(&ni->dir); + memset(&ni->dir, 0, sizeof(ni->dir)); + ni->ni_flags &= ~NI_FLAG_DIR; + } else { + run_close(&ni->file.run); + } + mode = S_IFLNK | 0777; + is_dir = false; + if (attr->non_res) { + run = &ni->file.run; + goto attr_unpack_run; // double break + } + break; + + case REPARSE_COMPRESSED: + break; + + case REPARSE_DEDUPLICATED: + break; + } + goto next_attr; + + case ATTR_EA_INFO: + if (!attr->name_len && + resident_data_ex(attr, sizeof(struct EA_INFO))) + ni->ni_flags |= NI_FLAG_EA; + goto next_attr; + + default: + goto next_attr; + } + +attr_unpack_run: + roff = le16_to_cpu(attr->nres.run_off); + + t64 = le64_to_cpu(attr->nres.svcn); + err = run_unpack_ex(run, sbi, ino, t64, le64_to_cpu(attr->nres.evcn), + t64, Add2Ptr(attr, roff), asize - roff); + if (err < 0) + goto out; + err = 0; + goto next_attr; + +end_enum: + + if (!std5) + goto out; + + if (!is_match && name) { + /* reuse rec as buffer for ascii name */ + err = -ENOENT; + goto out; + } + + if (std5->fa & FILE_ATTRIBUTE_READONLY) + mode &= ~0222; + + /* Setup 'uid' and 'gid' */ + inode->i_uid = sbi->options.fs_uid; + inode->i_gid = sbi->options.fs_gid; + + if (!names) { + err = -EINVAL; + goto out; + } + + if (S_ISDIR(mode)) { + ni->std_fa |= FILE_ATTRIBUTE_DIRECTORY; + + /* + * dot and dot-dot should be included in count but was not + * included in enumeration. + * Usually a hard links to directories are disabled + */ + set_nlink(inode, 1); + inode->i_op = &ntfs_dir_inode_operations; + inode->i_fop = &ntfs_dir_operations; + ni->i_valid = 0; + } else if (S_ISLNK(mode)) { + ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY; + inode->i_op = &ntfs_link_inode_operations; + inode->i_fop = NULL; + inode_nohighmem(inode); // ?? + set_nlink(inode, names); + } else if (S_ISREG(mode)) { + ni->std_fa &= ~FILE_ATTRIBUTE_DIRECTORY; + + set_nlink(inode, names); + + inode->i_op = &ntfs_file_inode_operations; + inode->i_fop = &ntfs_file_operations; + inode->i_mapping->a_ops = + is_compressed(ni) ? &ntfs_aops_cmpr : &ntfs_aops; + + if (ino != MFT_REC_MFT) + init_rwsem(&ni->file.run_lock); + } else if (fname && fname->home.low == cpu_to_le32(MFT_REC_EXTEND) && + fname->home.seq == cpu_to_le16(MFT_REC_EXTEND)) { + /* Records in $Extend are not a files or general directories */ + } else { + err = -EINVAL; + goto out; + } + + if ((sbi->options.sys_immutable && + (std5->fa & FILE_ATTRIBUTE_SYSTEM)) && + !S_ISFIFO(mode) && !S_ISSOCK(mode) && !S_ISLNK(mode)) { + inode->i_flags |= S_IMMUTABLE; + } else { + inode->i_flags &= ~S_IMMUTABLE; + } + + inode->i_mode = mode; + if (!(ni->ni_flags & NI_FLAG_EA)) { + /* if no xattr then no security (stored in xattr) */ + inode->i_flags |= S_NOSEC; + } + +Ok: + if (ino == MFT_REC_MFT && !sb->s_root) + sbi->mft.ni = NULL; + + unlock_new_inode(inode); + + return inode; + +out: + if (ino == MFT_REC_MFT && !sb->s_root) + sbi->mft.ni = NULL; + + iget_failed(inode); + return ERR_PTR(err); +} + +/* returns 1 if match */ +static int ntfs_test_inode(struct inode *inode, void *data) +{ + struct MFT_REF *ref = data; + + return ino_get(ref) == inode->i_ino; +} + +static int ntfs_set_inode(struct inode *inode, void *data) +{ + const struct MFT_REF *ref = data; + + inode->i_ino = ino_get(ref); + return 0; +} + +struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref, + const struct cpu_str *name) +{ + struct inode *inode; + + inode = iget5_locked(sb, ino_get(ref), ntfs_test_inode, ntfs_set_inode, + (void *)ref); + if (unlikely(!inode)) + return ERR_PTR(-ENOMEM); + + /* If this is a freshly allocated inode, need to read it now. */ + if (inode->i_state & I_NEW) + inode = ntfs_read_mft(inode, name, ref); + else if (ref->seq != ntfs_i(inode)->mi.mrec->seq) { + /* inode overlaps? */ + make_bad_inode(inode); + } + + return inode; +} + +enum get_block_ctx { + GET_BLOCK_GENERAL = 0, + GET_BLOCK_WRITE_BEGIN = 1, + GET_BLOCK_DIRECT_IO_R = 2, + GET_BLOCK_DIRECT_IO_W = 3, + GET_BLOCK_BMAP = 4, +}; + +static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo, + struct buffer_head *bh, int create, + enum get_block_ctx ctx) +{ + struct super_block *sb = inode->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct ntfs_inode *ni = ntfs_i(inode); + struct page *page = bh->b_page; + u8 cluster_bits = sbi->cluster_bits; + u32 block_size = sb->s_blocksize; + u64 bytes, lbo, valid; + u32 off; + int err; + CLST vcn, lcn, len; + bool new; + + /*clear previous state*/ + clear_buffer_new(bh); + clear_buffer_uptodate(bh); + + /* direct write uses 'create=0'*/ + if (!create && vbo >= ni->i_valid) { + /* out of valid */ + return 0; + } + + if (vbo >= inode->i_size) { + /* out of size */ + return 0; + } + + if (is_resident(ni)) { + ni_lock(ni); + err = attr_data_read_resident(ni, page); + ni_unlock(ni); + + if (!err) + set_buffer_uptodate(bh); + bh->b_size = block_size; + return err; + } + + vcn = vbo >> cluster_bits; + off = vbo & sbi->cluster_mask; + new = false; + + err = attr_data_get_block(ni, vcn, 1, &lcn, &len, create ? &new : NULL); + if (err) + goto out; + + if (!len) + return 0; + + bytes = ((u64)len << cluster_bits) - off; + + if (lcn == SPARSE_LCN) { + if (!create) { + if (bh->b_size > bytes) + bh->b_size = bytes; + + return 0; + } + WARN_ON(1); + } + + if (new) { + set_buffer_new(bh); + if ((len << cluster_bits) > block_size) + ntfs_sparse_cluster(inode, page, vcn, len); + } + + lbo = ((u64)lcn << cluster_bits) + off; + + set_buffer_mapped(bh); + bh->b_bdev = sb->s_bdev; + bh->b_blocknr = lbo >> sb->s_blocksize_bits; + + valid = ni->i_valid; + + if (ctx == GET_BLOCK_DIRECT_IO_W) { + /*ntfs_direct_IO will update ni->i_valid */ + if (vbo >= valid) + set_buffer_new(bh); + } else if (create) { + /*normal write*/ + if (vbo >= valid) { + set_buffer_new(bh); + if (bytes > bh->b_size) + bytes = bh->b_size; + ni->i_valid = vbo + bytes; + mark_inode_dirty(inode); + } + } else if (valid >= inode->i_size) { + /* normal read of normal file*/ + } else if (vbo >= valid) { + /* read out of valid data*/ + /* should never be here 'cause already checked */ + clear_buffer_mapped(bh); + } else if (vbo + bytes <= valid) { + /* normal read */ + } else if (vbo + block_size <= valid) { + /* normal short read */ + bytes = block_size; + } else { + /* + * read across valid size: vbo < valid && valid < vbo + block_size + */ + u32 voff = valid - vbo; + + bh->b_size = bytes = block_size; + off = vbo & (PAGE_SIZE - 1); + set_bh_page(bh, page, off); + ll_rw_block(REQ_OP_READ, 0, 1, &bh); + wait_on_buffer(bh); + /* Uhhuh. Read error. Complain and punt. */ + if (!buffer_uptodate(bh)) { + err = -EIO; + goto out; + } + zero_user_segment(page, off + voff, off + block_size); + } + + if (bh->b_size > bytes) + bh->b_size = bytes; + +#ifndef __LP64__ + if (ctx == GET_BLOCK_DIRECT_IO_W || ctx == GET_BLOCK_DIRECT_IO_R) { + static_assert(sizeof(size_t) < sizeof(loff_t)); + if (bytes > 0x40000000u) + bh->b_size = 0x40000000u; + } +#endif + + return 0; + +out: + return err; +} + +int ntfs_get_block(struct inode *inode, sector_t vbn, + struct buffer_head *bh_result, int create) +{ + return ntfs_get_block_vbo(inode, (u64)vbn << inode->i_blkbits, + bh_result, create, GET_BLOCK_GENERAL); +} + +static int ntfs_get_block_bmap(struct inode *inode, sector_t vsn, + struct buffer_head *bh_result, int create) +{ + return ntfs_get_block_vbo(inode, + (u64)vsn << inode->i_sb->s_blocksize_bits, + bh_result, create, GET_BLOCK_BMAP); +} + +static sector_t ntfs_bmap(struct address_space *mapping, sector_t block) +{ + return generic_block_bmap(mapping, block, ntfs_get_block_bmap); +} + +static int ntfs_readpage(struct file *file, struct page *page) +{ + int err; + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + struct ntfs_inode *ni = ntfs_i(inode); + + if (is_resident(ni)) { + ni_lock(ni); + err = attr_data_read_resident(ni, page); + ni_unlock(ni); + if (err != E_NTFS_NONRESIDENT) { + unlock_page(page); + return err; + } + } + + if (is_compressed(ni)) { + ni_lock(ni); + err = ni_readpage_cmpr(ni, page); + ni_unlock(ni); + return err; + } + + /* normal + sparse files */ + return mpage_readpage(page, ntfs_get_block); +} + +static void ntfs_readahead(struct readahead_control *rac) +{ + struct address_space *mapping = rac->mapping; + struct inode *inode = mapping->host; + struct ntfs_inode *ni = ntfs_i(inode); + u64 valid; + loff_t pos; + + if (is_resident(ni)) { + /* no readahead for resident */ + return; + } + + if (is_compressed(ni)) { + /* no readahead for compressed */ + return; + } + + valid = ni->i_valid; + pos = readahead_pos(rac); + + if (valid < i_size_read(inode) && pos <= valid && + valid < pos + readahead_length(rac)) { + /* range cross 'valid'. read it page by page */ + return; + } + + mpage_readahead(rac, ntfs_get_block); +} + +static int ntfs_get_block_direct_IO_R(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + return ntfs_get_block_vbo(inode, (u64)iblock << inode->i_blkbits, + bh_result, create, GET_BLOCK_DIRECT_IO_R); +} + +static int ntfs_get_block_direct_IO_W(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + return ntfs_get_block_vbo(inode, (u64)iblock << inode->i_blkbits, + bh_result, create, GET_BLOCK_DIRECT_IO_W); +} + +static ssize_t ntfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) +{ + struct file *file = iocb->ki_filp; + struct address_space *mapping = file->f_mapping; + struct inode *inode = mapping->host; + struct ntfs_inode *ni = ntfs_i(inode); + size_t count = iov_iter_count(iter); + loff_t vbo = iocb->ki_pos; + loff_t end = vbo + count; + int wr = iov_iter_rw(iter) & WRITE; + const struct iovec *iov = iter->iov; + unsigned long nr_segs = iter->nr_segs; + loff_t valid; + ssize_t ret; + + if (is_resident(ni)) { + /*switch to buffered write*/ + ret = 0; + goto out; + } + + ret = blockdev_direct_IO(iocb, inode, iter, + wr ? ntfs_get_block_direct_IO_W + : ntfs_get_block_direct_IO_R); + valid = ni->i_valid; + if (wr) { + if (ret <= 0) + goto out; + + vbo += ret; + if (vbo > valid && !S_ISBLK(inode->i_mode)) { + ni->i_valid = vbo; + mark_inode_dirty(inode); + } + } else if (vbo < valid && valid < end) { + /* fix page */ + unsigned long uaddr = ~0ul; + struct page *page; + long i, npages; + size_t dvbo = valid - vbo; + size_t off = 0; + + /*Find user address*/ + for (i = 0; i < nr_segs; i++) { + if (off <= dvbo && dvbo < off + iov[i].iov_len) { + uaddr = (unsigned long)iov[i].iov_base + dvbo - + off; + break; + } + off += iov[i].iov_len; + } + + if (uaddr == ~0ul) + goto fix_error; + + npages = get_user_pages_unlocked(uaddr, 1, &page, FOLL_WRITE); + + if (npages <= 0) + goto fix_error; + + zero_user_segment(page, valid & (PAGE_SIZE - 1), PAGE_SIZE); + put_page(page); + } + +out: + return ret; +fix_error: + ntfs_inode_warn(inode, "file garbage at 0x%llx", valid); + goto out; +} + +int ntfs_set_size(struct inode *inode, u64 new_size) +{ + struct super_block *sb = inode->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct ntfs_inode *ni = ntfs_i(inode); + int err; + + /* Check for maximum file size */ + if (is_sparsed(ni) || is_compressed(ni)) { + if (new_size > sbi->maxbytes_sparse) { + err = -EFBIG; + goto out; + } + } else if (new_size > sbi->maxbytes) { + err = -EFBIG; + goto out; + } + + ni_lock(ni); + down_write(&ni->file.run_lock); + + err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size, + &ni->i_valid, true, NULL); + + up_write(&ni->file.run_lock); + ni_unlock(ni); + + mark_inode_dirty(inode); + +out: + return err; +} + +static int ntfs_writepage(struct page *page, struct writeback_control *wbc) +{ + struct address_space *mapping = page->mapping; + struct inode *inode = mapping->host; + struct ntfs_inode *ni = ntfs_i(inode); + int err; + + if (is_resident(ni)) { + ni_lock(ni); + err = attr_data_write_resident(ni, page); + ni_unlock(ni); + if (err != E_NTFS_NONRESIDENT) { + unlock_page(page); + return err; + } + } + + return block_write_full_page(page, ntfs_get_block, wbc); +} + +static int ntfs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + struct inode *inode = mapping->host; + struct ntfs_inode *ni = ntfs_i(inode); + /* redirect call to 'ntfs_writepage' for resident files*/ + get_block_t *get_block = is_resident(ni) ? NULL : &ntfs_get_block; + + return mpage_writepages(mapping, wbc, get_block); +} + +static int ntfs_get_block_write_begin(struct inode *inode, sector_t vbn, + struct buffer_head *bh_result, int create) +{ + return ntfs_get_block_vbo(inode, (u64)vbn << inode->i_blkbits, + bh_result, create, GET_BLOCK_WRITE_BEGIN); +} + +static int ntfs_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, u32 len, u32 flags, struct page **pagep, + void **fsdata) +{ + int err; + struct inode *inode = mapping->host; + struct ntfs_inode *ni = ntfs_i(inode); + + *pagep = NULL; + if (is_resident(ni)) { + struct page *page = grab_cache_page_write_begin( + mapping, pos >> PAGE_SHIFT, flags); + + if (!page) { + err = -ENOMEM; + goto out; + } + + ni_lock(ni); + err = attr_data_read_resident(ni, page); + ni_unlock(ni); + + if (!err) { + *pagep = page; + goto out; + } + unlock_page(page); + put_page(page); + + if (err != E_NTFS_NONRESIDENT) + goto out; + } + + err = block_write_begin(mapping, pos, len, flags, pagep, + ntfs_get_block_write_begin); + +out: + return err; +} + +/* address_space_operations::write_end */ +static int ntfs_write_end(struct file *file, struct address_space *mapping, + loff_t pos, u32 len, u32 copied, struct page *page, + void *fsdata) + +{ + struct inode *inode = mapping->host; + struct ntfs_inode *ni = ntfs_i(inode); + u64 valid = ni->i_valid; + bool dirty = false; + int err; + + if (is_resident(ni)) { + ni_lock(ni); + err = attr_data_write_resident(ni, page); + ni_unlock(ni); + if (!err) { + dirty = true; + /* clear any buffers in page*/ + if (page_has_buffers(page)) { + struct buffer_head *head, *bh; + + bh = head = page_buffers(page); + do { + clear_buffer_dirty(bh); + clear_buffer_mapped(bh); + set_buffer_uptodate(bh); + } while (head != (bh = bh->b_this_page)); + } + SetPageUptodate(page); + err = copied; + } + unlock_page(page); + put_page(page); + } else { + err = generic_write_end(file, mapping, pos, len, copied, page, + fsdata); + } + + if (err >= 0) { + if (!(ni->std_fa & FILE_ATTRIBUTE_ARCHIVE)) { + inode->i_ctime = inode->i_mtime = current_time(inode); + ni->std_fa |= FILE_ATTRIBUTE_ARCHIVE; + dirty = true; + } + + if (valid != ni->i_valid) { + /* ni->i_valid is changed in ntfs_get_block_vbo */ + dirty = true; + } + + if (dirty) + mark_inode_dirty(inode); + } + + return err; +} + +int reset_log_file(struct inode *inode) +{ + int err; + loff_t pos = 0; + u32 log_size = inode->i_size; + struct address_space *mapping = inode->i_mapping; + + for (;;) { + u32 len; + void *kaddr; + struct page *page; + + len = pos + PAGE_SIZE > log_size ? (log_size - pos) : PAGE_SIZE; + + err = block_write_begin(mapping, pos, len, 0, &page, + ntfs_get_block_write_begin); + if (err) + goto out; + + kaddr = kmap_atomic(page); + memset(kaddr, -1, len); + kunmap_atomic(kaddr); + flush_dcache_page(page); + + err = block_write_end(NULL, mapping, pos, len, len, page, NULL); + if (err < 0) + goto out; + pos += len; + + if (pos >= log_size) + break; + balance_dirty_pages_ratelimited(mapping); + } +out: + mark_inode_dirty_sync(inode); + + return err; +} + +int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc) +{ + return _ni_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); +} + +int ntfs_sync_inode(struct inode *inode) +{ + return _ni_write_inode(inode, 1); +} + +/* + * helper function for ntfs_flush_inodes. This writes both the inode + * and the file data blocks, waiting for in flight data blocks before + * the start of the call. It does not wait for any io started + * during the call + */ +static int writeback_inode(struct inode *inode) +{ + int ret = sync_inode_metadata(inode, 0); + + if (!ret) + ret = filemap_fdatawrite(inode->i_mapping); + return ret; +} + +/* + * write data and metadata corresponding to i1 and i2. The io is + * started but we do not wait for any of it to finish. + * + * filemap_flush is used for the block device, so if there is a dirty + * page for a block already in flight, we will not wait and start the + * io over again + */ +int ntfs_flush_inodes(struct super_block *sb, struct inode *i1, + struct inode *i2) +{ + int ret = 0; + + if (i1) + ret = writeback_inode(i1); + if (!ret && i2) + ret = writeback_inode(i2); + if (!ret) + ret = filemap_flush(sb->s_bdev->bd_inode->i_mapping); + return ret; +} + +int inode_write_data(struct inode *inode, const void *data, size_t bytes) +{ + pgoff_t idx; + + /* Write non resident data */ + for (idx = 0; bytes; idx++) { + size_t op = bytes > PAGE_SIZE ? PAGE_SIZE : bytes; + struct page *page = ntfs_map_page(inode->i_mapping, idx); + + if (IS_ERR(page)) + return PTR_ERR(page); + + lock_page(page); + WARN_ON(!PageUptodate(page)); + ClearPageUptodate(page); + + memcpy(page_address(page), data, op); + + flush_dcache_page(page); + SetPageUptodate(page); + unlock_page(page); + + ntfs_unmap_page(page); + + bytes -= op; + data = Add2Ptr(data, PAGE_SIZE); + } + return 0; +} + +/* + * number of bytes to for REPARSE_DATA_BUFFER(IO_REPARSE_TAG_SYMLINK) + * for unicode string of 'uni_len' length + */ +static inline u32 ntfs_reparse_bytes(u32 uni_len) +{ + /* header + unicode string + decorated unicode string */ + return sizeof(short) * (2 * uni_len + 4) + + offsetof(struct REPARSE_DATA_BUFFER, + SymbolicLinkReparseBuffer.PathBuffer); +} + +static struct REPARSE_DATA_BUFFER * +ntfs_create_reparse_buffer(struct ntfs_sb_info *sbi, const char *symname, + u32 size, u16 *nsize) +{ + int i, err; + struct REPARSE_DATA_BUFFER *rp; + __le16 *rp_name; + typeof(rp->SymbolicLinkReparseBuffer) *rs; + + rp = ntfs_zalloc(ntfs_reparse_bytes(2 * size + 2)); + if (!rp) + return ERR_PTR(-ENOMEM); + + rs = &rp->SymbolicLinkReparseBuffer; + rp_name = rs->PathBuffer; + + /* Convert link name to utf16 */ + err = ntfs_nls_to_utf16(sbi, symname, size, + (struct cpu_str *)(rp_name - 1), 2 * size, + UTF16_LITTLE_ENDIAN); + if (err < 0) + goto out; + + /* err = the length of unicode name of symlink */ + *nsize = ntfs_reparse_bytes(err); + + if (*nsize > sbi->reparse.max_size) { + err = -EFBIG; + goto out; + } + + /* translate linux '/' into windows '\' */ + for (i = 0; i < err; i++) { + if (rp_name[i] == cpu_to_le16('/')) + rp_name[i] = cpu_to_le16('\\'); + } + + rp->ReparseTag = IO_REPARSE_TAG_SYMLINK; + rp->ReparseDataLength = + cpu_to_le16(*nsize - offsetof(struct REPARSE_DATA_BUFFER, + SymbolicLinkReparseBuffer)); + + /* PrintName + SubstituteName */ + rs->SubstituteNameOffset = cpu_to_le16(sizeof(short) * err); + rs->SubstituteNameLength = cpu_to_le16(sizeof(short) * err + 8); + rs->PrintNameLength = rs->SubstituteNameOffset; + + /* + * TODO: use relative path if possible to allow windows to parse this path + * 0-absolute path 1- relative path (SYMLINK_FLAG_RELATIVE) + */ + rs->Flags = 0; + + memmove(rp_name + err + 4, rp_name, sizeof(short) * err); + + /* decorate SubstituteName */ + rp_name += err; + rp_name[0] = cpu_to_le16('\\'); + rp_name[1] = cpu_to_le16('?'); + rp_name[2] = cpu_to_le16('?'); + rp_name[3] = cpu_to_le16('\\'); + + return rp; +out: + ntfs_free(rp); + return ERR_PTR(err); +} + +struct inode *ntfs_create_inode(struct inode *dir, struct dentry *dentry, + const struct cpu_str *uni, umode_t mode, + dev_t dev, const char *symname, u32 size, + int excl, struct ntfs_fnd *fnd) +{ + int err; + struct super_block *sb = dir->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + const struct qstr *name = &dentry->d_name; + CLST ino = 0; + struct ntfs_inode *dir_ni = ntfs_i(dir); + struct ntfs_inode *ni = NULL; + struct inode *inode = NULL; + struct ATTRIB *attr; + struct ATTR_STD_INFO5 *std5; + struct ATTR_FILE_NAME *fname; + struct MFT_REC *rec; + u32 asize, dsize, sd_size; + enum FILE_ATTRIBUTE fa; + __le32 security_id = SECURITY_ID_INVALID; + CLST vcn; + const void *sd; + u16 t16, nsize = 0, aid = 0; + struct INDEX_ROOT *root, *dir_root; + struct NTFS_DE *e, *new_de = NULL; + struct REPARSE_DATA_BUFFER *rp = NULL; + bool is_dir = S_ISDIR(mode); + bool is_link = S_ISLNK(mode); + bool rp_inserted = false; + bool is_sp = S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || + S_ISSOCK(mode); + + if (is_sp) + return ERR_PTR(-EOPNOTSUPP); + + dir_root = indx_get_root(&dir_ni->dir, dir_ni, NULL, NULL); + if (!dir_root) + return ERR_PTR(-EINVAL); + + if (is_dir) { + /* use parent's directory attributes */ + fa = dir_ni->std_fa | FILE_ATTRIBUTE_DIRECTORY | + FILE_ATTRIBUTE_ARCHIVE; + /* + * By default child directory inherits parent attributes + * root directory is hidden + system + * Make an exception for children in root + */ + if (dir->i_ino == MFT_REC_ROOT) + fa &= ~(FILE_ATTRIBUTE_HIDDEN | FILE_ATTRIBUTE_SYSTEM); + } else if (is_link) { + /* It is good idea that link should be the same type (file/dir) as target */ + fa = FILE_ATTRIBUTE_REPARSE_POINT; + + /* + * linux: there are dir/file/symlink and so on + * NTFS: symlinks are "dir + reparse" or "file + reparse" + * It is good idea to create: + * dir + reparse if 'symname' points to directory + * or + * file + reparse if 'symname' points to file + * Unfortunately kern_path hangs if symname contains 'dir' + */ + + /* + * struct path path; + * + * if (!kern_path(symname, LOOKUP_FOLLOW, &path)){ + * struct inode *target = d_inode(path.dentry); + * + * if (S_ISDIR(target->i_mode)) + * fa |= FILE_ATTRIBUTE_DIRECTORY; + * // if ( target->i_sb == sb ){ + * // use relative path? + * // } + * path_put(&path); + * } + */ + } else if (sbi->options.sparse) { + /* sparsed regular file, cause option 'sparse' */ + fa = FILE_ATTRIBUTE_SPARSE_FILE | FILE_ATTRIBUTE_ARCHIVE; + } else if (dir_ni->std_fa & FILE_ATTRIBUTE_COMPRESSED) { + /* compressed regular file, if parent is compressed */ + fa = FILE_ATTRIBUTE_COMPRESSED | FILE_ATTRIBUTE_ARCHIVE; + } else { + /* regular file, default attributes */ + fa = FILE_ATTRIBUTE_ARCHIVE; + } + + if (!(mode & 0222)) + fa |= FILE_ATTRIBUTE_READONLY; + + /* allocate PATH_MAX bytes */ + new_de = __getname(); + if (!new_de) { + err = -ENOMEM; + goto out1; + } + + /*mark rw ntfs as dirty. it will be cleared at umount*/ + ntfs_set_state(sbi, NTFS_DIRTY_DIRTY); + + /* Step 1: allocate and fill new mft record */ + err = ntfs_look_free_mft(sbi, &ino, false, NULL, NULL); + if (err) + goto out2; + + ni = ntfs_new_inode(sbi, ino, fa & FILE_ATTRIBUTE_DIRECTORY); + if (IS_ERR(ni)) { + err = PTR_ERR(ni); + ni = NULL; + goto out3; + } + inode = &ni->vfs_inode; + + inode->i_atime = inode->i_mtime = inode->i_ctime = ni->i_crtime = + current_time(inode); + + rec = ni->mi.mrec; + rec->hard_links = cpu_to_le16(1); + attr = Add2Ptr(rec, le16_to_cpu(rec->attr_off)); + + /* Get default security id */ + sd = s_default_security; + sd_size = sizeof(s_default_security); + + if (is_ntfs3(sbi)) { + security_id = dir_ni->std_security_id; + if (le32_to_cpu(security_id) < SECURITY_ID_FIRST) { + security_id = sbi->security.def_security_id; + + if (security_id == SECURITY_ID_INVALID && + !ntfs_insert_security(sbi, sd, sd_size, + &security_id, NULL)) + sbi->security.def_security_id = security_id; + } + } + + /* Insert standard info */ + std5 = Add2Ptr(attr, SIZEOF_RESIDENT); + + if (security_id == SECURITY_ID_INVALID) { + dsize = sizeof(struct ATTR_STD_INFO); + } else { + dsize = sizeof(struct ATTR_STD_INFO5); + std5->security_id = security_id; + ni->std_security_id = security_id; + } + asize = SIZEOF_RESIDENT + dsize; + + attr->type = ATTR_STD; + attr->size = cpu_to_le32(asize); + attr->id = cpu_to_le16(aid++); + attr->res.data_off = SIZEOF_RESIDENT_LE; + attr->res.data_size = cpu_to_le32(dsize); + + std5->cr_time = std5->m_time = std5->c_time = std5->a_time = + kernel2nt(&inode->i_atime); + + ni->std_fa = fa; + std5->fa = fa; + + attr = Add2Ptr(attr, asize); + + /* Insert file name */ + err = fill_name_de(sbi, new_de, name, uni); + if (err) + goto out4; + + mi_get_ref(&ni->mi, &new_de->ref); + + fname = (struct ATTR_FILE_NAME *)(new_de + 1); + mi_get_ref(&dir_ni->mi, &fname->home); + fname->dup.cr_time = fname->dup.m_time = fname->dup.c_time = + fname->dup.a_time = std5->cr_time; + fname->dup.alloc_size = fname->dup.data_size = 0; + fname->dup.fa = std5->fa; + fname->dup.ea_size = fname->dup.reparse = 0; + + dsize = le16_to_cpu(new_de->key_size); + asize = QuadAlign(SIZEOF_RESIDENT + dsize); + + attr->type = ATTR_NAME; + attr->size = cpu_to_le32(asize); + attr->res.data_off = SIZEOF_RESIDENT_LE; + attr->res.flags = RESIDENT_FLAG_INDEXED; + attr->id = cpu_to_le16(aid++); + attr->res.data_size = cpu_to_le32(dsize); + memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), fname, dsize); + + attr = Add2Ptr(attr, asize); + + if (security_id == SECURITY_ID_INVALID) { + /* Insert security attribute */ + asize = SIZEOF_RESIDENT + QuadAlign(sd_size); + + attr->type = ATTR_SECURE; + attr->size = cpu_to_le32(asize); + attr->id = cpu_to_le16(aid++); + attr->res.data_off = SIZEOF_RESIDENT_LE; + attr->res.data_size = cpu_to_le32(sd_size); + memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), sd, sd_size); + + attr = Add2Ptr(attr, asize); + } + + if (fa & FILE_ATTRIBUTE_DIRECTORY) { + /* + * regular directory or symlink to directory + * Create root attribute + */ + dsize = sizeof(struct INDEX_ROOT) + sizeof(struct NTFS_DE); + asize = sizeof(I30_NAME) + SIZEOF_RESIDENT + dsize; + + attr->type = ATTR_ROOT; + attr->size = cpu_to_le32(asize); + attr->id = cpu_to_le16(aid++); + + attr->name_len = ARRAY_SIZE(I30_NAME); + attr->name_off = SIZEOF_RESIDENT_LE; + attr->res.data_off = + cpu_to_le16(sizeof(I30_NAME) + SIZEOF_RESIDENT); + attr->res.data_size = cpu_to_le32(dsize); + memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), I30_NAME, + sizeof(I30_NAME)); + + root = Add2Ptr(attr, sizeof(I30_NAME) + SIZEOF_RESIDENT); + memcpy(root, dir_root, offsetof(struct INDEX_ROOT, ihdr)); + root->ihdr.de_off = + cpu_to_le32(sizeof(struct INDEX_HDR)); // 0x10 + root->ihdr.used = cpu_to_le32(sizeof(struct INDEX_HDR) + + sizeof(struct NTFS_DE)); + root->ihdr.total = root->ihdr.used; + + e = Add2Ptr(root, sizeof(struct INDEX_ROOT)); + e->size = cpu_to_le16(sizeof(struct NTFS_DE)); + e->flags = NTFS_IE_LAST; + } else if (is_link) { + /* + * symlink to file + * Create empty resident data attribute + */ + asize = SIZEOF_RESIDENT; + + /* insert empty ATTR_DATA */ + attr->type = ATTR_DATA; + attr->size = cpu_to_le32(SIZEOF_RESIDENT); + attr->id = cpu_to_le16(aid++); + attr->name_off = SIZEOF_RESIDENT_LE; + attr->res.data_off = SIZEOF_RESIDENT_LE; + } else { + /* + * regular file + */ + attr->type = ATTR_DATA; + attr->id = cpu_to_le16(aid++); + /* Create empty non resident data attribute */ + attr->non_res = 1; + attr->nres.evcn = cpu_to_le64(-1ll); + if (fa & FILE_ATTRIBUTE_SPARSE_FILE) { + attr->size = cpu_to_le32(SIZEOF_NONRESIDENT_EX + 8); + attr->name_off = SIZEOF_NONRESIDENT_EX_LE; + attr->flags = ATTR_FLAG_SPARSED; + asize = SIZEOF_NONRESIDENT_EX + 8; + } else if (fa & FILE_ATTRIBUTE_COMPRESSED) { + attr->size = cpu_to_le32(SIZEOF_NONRESIDENT_EX + 8); + attr->name_off = SIZEOF_NONRESIDENT_EX_LE; + attr->flags = ATTR_FLAG_COMPRESSED; + attr->nres.c_unit = COMPRESSION_UNIT; + asize = SIZEOF_NONRESIDENT_EX + 8; + } else { + attr->size = cpu_to_le32(SIZEOF_NONRESIDENT + 8); + attr->name_off = SIZEOF_NONRESIDENT_LE; + asize = SIZEOF_NONRESIDENT + 8; + } + attr->nres.run_off = attr->name_off; + } + + if (is_dir) { + ni->ni_flags |= NI_FLAG_DIR; + err = indx_init(&ni->dir, sbi, attr, INDEX_MUTEX_I30); + if (err) + goto out4; + } else if (is_link) { + rp = ntfs_create_reparse_buffer(sbi, symname, size, &nsize); + + if (IS_ERR(rp)) { + err = PTR_ERR(rp); + rp = NULL; + goto out4; + } + + /* + * Insert ATTR_REPARSE + */ + attr = Add2Ptr(attr, asize); + attr->type = ATTR_REPARSE; + attr->id = cpu_to_le16(aid++); + + /* resident or non resident? */ + asize = QuadAlign(SIZEOF_RESIDENT + nsize); + t16 = PtrOffset(rec, attr); + + if (asize + t16 + 8 > sbi->record_size) { + CLST alen; + CLST clst = bytes_to_cluster(sbi, nsize); + + /* bytes per runs */ + t16 = sbi->record_size - t16 - SIZEOF_NONRESIDENT; + + attr->non_res = 1; + attr->nres.evcn = cpu_to_le64(clst - 1); + attr->name_off = SIZEOF_NONRESIDENT_LE; + attr->nres.run_off = attr->name_off; + attr->nres.data_size = cpu_to_le64(nsize); + attr->nres.valid_size = attr->nres.data_size; + attr->nres.alloc_size = + cpu_to_le64(ntfs_up_cluster(sbi, nsize)); + + err = attr_allocate_clusters(sbi, &ni->file.run, 0, 0, + clst, NULL, 0, &alen, 0, + NULL); + if (err) + goto out5; + + err = run_pack(&ni->file.run, 0, clst, + Add2Ptr(attr, SIZEOF_NONRESIDENT), t16, + &vcn); + if (err < 0) + goto out5; + + if (vcn != clst) { + err = -EINVAL; + goto out5; + } + + asize = SIZEOF_NONRESIDENT + QuadAlign(err); + inode->i_size = nsize; + } else { + attr->res.data_off = SIZEOF_RESIDENT_LE; + attr->res.data_size = cpu_to_le32(nsize); + memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), rp, nsize); + inode->i_size = nsize; + nsize = 0; + } + + attr->size = cpu_to_le32(asize); + + err = ntfs_insert_reparse(sbi, IO_REPARSE_TAG_SYMLINK, + &new_de->ref); + if (err) + goto out5; + + rp_inserted = true; + } + + attr = Add2Ptr(attr, asize); + attr->type = ATTR_END; + + rec->used = cpu_to_le32(PtrOffset(rec, attr) + 8); + rec->next_attr_id = cpu_to_le16(aid); + + /* Step 2: Add new name in index */ + err = indx_insert_entry(&dir_ni->dir, dir_ni, new_de, sbi, fnd); + if (err) + goto out6; + + /* Update current directory record */ + mark_inode_dirty(dir); + + /* Fill vfs inode fields */ + inode->i_uid = sbi->options.uid ? sbi->options.fs_uid : current_fsuid(); + inode->i_gid = sbi->options.gid ? sbi->options.fs_gid + : (dir->i_mode & S_ISGID) ? dir->i_gid + : current_fsgid(); + inode->i_generation = le16_to_cpu(rec->seq); + + dir->i_mtime = dir->i_ctime = inode->i_atime; + + if (is_dir) { + if (dir->i_mode & S_ISGID) + mode |= S_ISGID; + inode->i_op = &ntfs_dir_inode_operations; + inode->i_fop = &ntfs_dir_operations; + } else if (is_link) { + inode->i_op = &ntfs_link_inode_operations; + inode->i_fop = NULL; + inode->i_mapping->a_ops = &ntfs_aops; + } else { + inode->i_op = &ntfs_file_inode_operations; + inode->i_fop = &ntfs_file_operations; + inode->i_mapping->a_ops = + is_compressed(ni) ? &ntfs_aops_cmpr : &ntfs_aops; + init_rwsem(&ni->file.run_lock); + } + + inode->i_mode = mode; + +#ifdef CONFIG_NTFS3_FS_POSIX_ACL + if (!is_link && (sb->s_flags & SB_POSIXACL)) { + err = ntfs_init_acl(inode, dir); + if (err) + goto out6; + } else +#endif + { + inode->i_flags |= S_NOSEC; + } + + /* Write non resident data */ + if (nsize) { + err = ntfs_sb_write_run(sbi, &ni->file.run, 0, rp, nsize); + if (err) + goto out7; + } + + /* call 'd_instantiate' after inode->i_op is set but before finish_open */ + d_instantiate(dentry, inode); + + mark_inode_dirty(inode); + mark_inode_dirty(dir); + + /* normal exit */ + goto out2; + +out7: + + /* undo 'indx_insert_entry' */ + indx_delete_entry(&dir_ni->dir, dir_ni, new_de + 1, + le16_to_cpu(new_de->key_size), sbi); +out6: + if (rp_inserted) + ntfs_remove_reparse(sbi, IO_REPARSE_TAG_SYMLINK, &new_de->ref); + +out5: + if (is_dir || run_is_empty(&ni->file.run)) + goto out4; + + run_deallocate(sbi, &ni->file.run, false); + +out4: + clear_rec_inuse(rec); + clear_nlink(inode); + ni->mi.dirty = false; + discard_new_inode(inode); +out3: + ntfs_mark_rec_free(sbi, ino); + +out2: + __putname(new_de); + ntfs_free(rp); + +out1: + if (err) + return ERR_PTR(err); + + unlock_new_inode(inode); + + return inode; +} + +int ntfs_link_inode(struct inode *inode, struct dentry *dentry) +{ + int err; + struct inode *dir = d_inode(dentry->d_parent); + struct ntfs_inode *dir_ni = ntfs_i(dir); + struct ntfs_inode *ni = ntfs_i(inode); + struct super_block *sb = inode->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + const struct qstr *name = &dentry->d_name; + struct NTFS_DE *new_de = NULL; + struct ATTR_FILE_NAME *fname; + struct ATTRIB *attr; + u16 key_size; + struct INDEX_ROOT *dir_root; + + dir_root = indx_get_root(&dir_ni->dir, dir_ni, NULL, NULL); + if (!dir_root) + return -EINVAL; + + /* allocate PATH_MAX bytes */ + new_de = __getname(); + if (!new_de) + return -ENOMEM; + + /*mark rw ntfs as dirty. it will be cleared at umount*/ + ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_DIRTY); + + // Insert file name + err = fill_name_de(sbi, new_de, name, NULL); + if (err) + goto out; + + key_size = le16_to_cpu(new_de->key_size); + err = ni_insert_resident(ni, key_size, ATTR_NAME, NULL, 0, &attr, NULL); + if (err) + goto out; + + mi_get_ref(&ni->mi, &new_de->ref); + + fname = (struct ATTR_FILE_NAME *)(new_de + 1); + mi_get_ref(&dir_ni->mi, &fname->home); + fname->dup.cr_time = fname->dup.m_time = fname->dup.c_time = + fname->dup.a_time = kernel2nt(&inode->i_ctime); + fname->dup.alloc_size = fname->dup.data_size = 0; + fname->dup.fa = ni->std_fa; + fname->dup.ea_size = fname->dup.reparse = 0; + + memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), fname, key_size); + + err = indx_insert_entry(&dir_ni->dir, dir_ni, new_de, sbi, NULL); + if (err) + goto out; + + le16_add_cpu(&ni->mi.mrec->hard_links, 1); + ni->mi.dirty = true; + +out: + __putname(new_de); + return err; +} + +/* + * ntfs_unlink_inode + * + * inode_operations::unlink + * inode_operations::rmdir + */ +int ntfs_unlink_inode(struct inode *dir, const struct dentry *dentry) +{ + int err; + struct super_block *sb = dir->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct inode *inode = d_inode(dentry); + struct ntfs_inode *ni = ntfs_i(inode); + const struct qstr *name = &dentry->d_name; + struct ntfs_inode *dir_ni = ntfs_i(dir); + struct ntfs_index *indx = &dir_ni->dir; + struct cpu_str *uni = NULL; + struct ATTR_FILE_NAME *fname; + u8 name_type; + struct ATTR_LIST_ENTRY *le; + struct MFT_REF ref; + bool is_dir = S_ISDIR(inode->i_mode); + struct INDEX_ROOT *dir_root; + + dir_root = indx_get_root(indx, dir_ni, NULL, NULL); + if (!dir_root) + return -EINVAL; + + ni_lock(ni); + + if (is_dir && !dir_is_empty(inode)) { + err = -ENOTEMPTY; + goto out1; + } + + if (ntfs_is_meta_file(sbi, inode->i_ino)) { + err = -EINVAL; + goto out1; + } + + /* allocate PATH_MAX bytes */ + uni = __getname(); + if (!uni) { + err = -ENOMEM; + goto out1; + } + + /* Convert input string to unicode */ + err = ntfs_nls_to_utf16(sbi, name->name, name->len, uni, NTFS_NAME_LEN, + UTF16_HOST_ENDIAN); + if (err < 0) + goto out2; + + /*mark rw ntfs as dirty. it will be cleared at umount*/ + ntfs_set_state(sbi, NTFS_DIRTY_DIRTY); + + /* find name in record */ + mi_get_ref(&dir_ni->mi, &ref); + + le = NULL; + fname = ni_fname_name(ni, uni, &ref, &le); + if (!fname) { + err = -ENOENT; + goto out3; + } + + name_type = paired_name(fname->type); + + err = indx_delete_entry(indx, dir_ni, fname, fname_full_size(fname), + sbi); + if (err) + goto out3; + + /* Then remove name from mft */ + ni_remove_attr_le(ni, attr_from_name(fname), le); + + le16_add_cpu(&ni->mi.mrec->hard_links, -1); + ni->mi.dirty = true; + + if (name_type != FILE_NAME_POSIX) { + /* Now we should delete name by type */ + fname = ni_fname_type(ni, name_type, &le); + if (fname) { + err = indx_delete_entry(indx, dir_ni, fname, + fname_full_size(fname), sbi); + if (err) + goto out3; + + ni_remove_attr_le(ni, attr_from_name(fname), le); + + le16_add_cpu(&ni->mi.mrec->hard_links, -1); + } + } +out3: + switch (err) { + case 0: + drop_nlink(inode); + case -ENOTEMPTY: + case -ENOSPC: + case -EROFS: + break; + default: + make_bad_inode(inode); + } + + dir->i_mtime = dir->i_ctime = current_time(dir); + mark_inode_dirty(dir); + inode->i_ctime = dir->i_ctime; + if (inode->i_nlink) + mark_inode_dirty(inode); + +out2: + __putname(uni); +out1: + ni_unlock(ni); + return err; +} + +void ntfs_evict_inode(struct inode *inode) +{ + truncate_inode_pages_final(&inode->i_data); + + if (inode->i_nlink) + _ni_write_inode(inode, inode_needs_sync(inode)); + + invalidate_inode_buffers(inode); + clear_inode(inode); + + ni_clear(ntfs_i(inode)); +} + +static noinline int ntfs_readlink_hlp(struct inode *inode, char *buffer, + int buflen) +{ + int i, err = 0; + struct ntfs_inode *ni = ntfs_i(inode); + struct super_block *sb = inode->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + u64 i_size = inode->i_size; + u16 nlen = 0; + void *to_free = NULL; + struct REPARSE_DATA_BUFFER *rp; + struct le_str *uni; + struct ATTRIB *attr; + + /* Reparse data present. Try to parse it */ + static_assert(!offsetof(struct REPARSE_DATA_BUFFER, ReparseTag)); + static_assert(sizeof(u32) == sizeof(rp->ReparseTag)); + + *buffer = 0; + + /* Read into temporal buffer */ + if (i_size > sbi->reparse.max_size || i_size <= sizeof(u32)) { + err = -EINVAL; + goto out; + } + + attr = ni_find_attr(ni, NULL, NULL, ATTR_REPARSE, NULL, 0, NULL, NULL); + if (!attr) { + err = -EINVAL; + goto out; + } + + if (!attr->non_res) { + rp = resident_data_ex(attr, i_size); + if (!rp) { + err = -EINVAL; + goto out; + } + } else { + rp = ntfs_malloc(i_size); + if (!rp) { + err = -ENOMEM; + goto out; + } + to_free = rp; + err = ntfs_read_run_nb(sbi, &ni->file.run, 0, rp, i_size, NULL); + if (err) + goto out; + } + + err = -EINVAL; + + /* Microsoft Tag */ + switch (rp->ReparseTag) { + case IO_REPARSE_TAG_MOUNT_POINT: + /* Mount points and junctions */ + /* Can we use 'Rp->MountPointReparseBuffer.PrintNameLength'? */ + if (i_size <= offsetof(struct REPARSE_DATA_BUFFER, + MountPointReparseBuffer.PathBuffer)) + goto out; + uni = Add2Ptr(rp, + offsetof(struct REPARSE_DATA_BUFFER, + MountPointReparseBuffer.PathBuffer) + + le16_to_cpu(rp->MountPointReparseBuffer + .PrintNameOffset) - + 2); + nlen = le16_to_cpu(rp->MountPointReparseBuffer.PrintNameLength); + break; + + case IO_REPARSE_TAG_SYMLINK: + /* FolderSymbolicLink */ + /* Can we use 'Rp->SymbolicLinkReparseBuffer.PrintNameLength'? */ + if (i_size <= offsetof(struct REPARSE_DATA_BUFFER, + SymbolicLinkReparseBuffer.PathBuffer)) + goto out; + uni = Add2Ptr(rp, + offsetof(struct REPARSE_DATA_BUFFER, + SymbolicLinkReparseBuffer.PathBuffer) + + le16_to_cpu(rp->SymbolicLinkReparseBuffer + .PrintNameOffset) - + 2); + nlen = le16_to_cpu( + rp->SymbolicLinkReparseBuffer.PrintNameLength); + break; + + case IO_REPARSE_TAG_CLOUD: + case IO_REPARSE_TAG_CLOUD_1: + case IO_REPARSE_TAG_CLOUD_2: + case IO_REPARSE_TAG_CLOUD_3: + case IO_REPARSE_TAG_CLOUD_4: + case IO_REPARSE_TAG_CLOUD_5: + case IO_REPARSE_TAG_CLOUD_6: + case IO_REPARSE_TAG_CLOUD_7: + case IO_REPARSE_TAG_CLOUD_8: + case IO_REPARSE_TAG_CLOUD_9: + case IO_REPARSE_TAG_CLOUD_A: + case IO_REPARSE_TAG_CLOUD_B: + case IO_REPARSE_TAG_CLOUD_C: + case IO_REPARSE_TAG_CLOUD_D: + case IO_REPARSE_TAG_CLOUD_E: + case IO_REPARSE_TAG_CLOUD_F: + err = sizeof("OneDrive") - 1; + if (err > buflen) + err = buflen; + memcpy(buffer, "OneDrive", err); + goto out; + + default: + if (IsReparseTagMicrosoft(rp->ReparseTag)) { + /* unknown Microsoft Tag */ + goto out; + } + if (!IsReparseTagNameSurrogate(rp->ReparseTag) || + i_size <= sizeof(struct REPARSE_POINT)) { + goto out; + } + + /* Users tag */ + uni = Add2Ptr(rp, sizeof(struct REPARSE_POINT) - 2); + nlen = le16_to_cpu(rp->ReparseDataLength) - + sizeof(struct REPARSE_POINT); + } + + /* Convert nlen from bytes to UNICODE chars */ + nlen >>= 1; + + /* Check that name is available */ + if (!nlen || &uni->name[nlen] > (__le16 *)Add2Ptr(rp, i_size)) + goto out; + + /* If name is already zero terminated then truncate it now */ + if (!uni->name[nlen - 1]) + nlen -= 1; + uni->len = nlen; + + err = ntfs_utf16_to_nls(sbi, uni, buffer, buflen); + + if (err < 0) + goto out; + + /* translate windows '\' into linux '/' */ + for (i = 0; i < err; i++) { + if (buffer[i] == '\\') + buffer[i] = '/'; + } + + /* Always set last zero */ + buffer[err] = 0; +out: + ntfs_free(to_free); + return err; +} + +static const char *ntfs_get_link(struct dentry *de, struct inode *inode, + struct delayed_call *done) +{ + int err; + char *ret; + + if (!de) + return ERR_PTR(-ECHILD); + + ret = kmalloc(PAGE_SIZE, GFP_NOFS); + if (!ret) + return ERR_PTR(-ENOMEM); + + err = ntfs_readlink_hlp(inode, ret, PAGE_SIZE); + if (err < 0) { + kfree(ret); + return ERR_PTR(err); + } + + set_delayed_call(done, kfree_link, ret); + + return ret; +} + +const struct inode_operations ntfs_link_inode_operations = { + .get_link = ntfs_get_link, + .setattr = ntfs3_setattr, + .listxattr = ntfs_listxattr, + .permission = ntfs_permission, + .get_acl = ntfs_get_acl, + .set_acl = ntfs_set_acl, +}; + +const struct address_space_operations ntfs_aops = { + .readpage = ntfs_readpage, + .readahead = ntfs_readahead, + .writepage = ntfs_writepage, + .writepages = ntfs_writepages, + .write_begin = ntfs_write_begin, + .write_end = ntfs_write_end, + .direct_IO = ntfs_direct_IO, + .bmap = ntfs_bmap, +}; + +const struct address_space_operations ntfs_aops_cmpr = { + .readpage = ntfs_readpage, + .readahead = ntfs_readahead, +}; diff --git a/fs/ntfs3/lib/decompress_common.c b/fs/ntfs3/lib/decompress_common.c new file mode 100644 index 000000000000..83c9e93aea77 --- /dev/null +++ b/fs/ntfs3/lib/decompress_common.c @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * decompress_common.c - Code shared by the XPRESS and LZX decompressors + * + * Copyright (C) 2015 Eric Biggers + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "decompress_common.h" + +/* + * make_huffman_decode_table() - + * + * Build a decoding table for a canonical prefix code, or "Huffman code". + * + * This is an internal function, not part of the library API! + * + * This takes as input the length of the codeword for each symbol in the + * alphabet and produces as output a table that can be used for fast + * decoding of prefix-encoded symbols using read_huffsym(). + * + * Strictly speaking, a canonical prefix code might not be a Huffman + * code. But this algorithm will work either way; and in fact, since + * Huffman codes are defined in terms of symbol frequencies, there is no + * way for the decompressor to know whether the code is a true Huffman + * code or not until all symbols have been decoded. + * + * Because the prefix code is assumed to be "canonical", it can be + * reconstructed directly from the codeword lengths. A prefix code is + * canonical if and only if a longer codeword never lexicographically + * precedes a shorter codeword, and the lexicographic ordering of + * codewords of the same length is the same as the lexicographic ordering + * of the corresponding symbols. Consequently, we can sort the symbols + * primarily by codeword length and secondarily by symbol value, then + * reconstruct the prefix code by generating codewords lexicographically + * in that order. + * + * This function does not, however, generate the prefix code explicitly. + * Instead, it directly builds a table for decoding symbols using the + * code. The basic idea is this: given the next 'max_codeword_len' bits + * in the input, we can look up the decoded symbol by indexing a table + * containing 2**max_codeword_len entries. A codeword with length + * 'max_codeword_len' will have exactly one entry in this table, whereas + * a codeword shorter than 'max_codeword_len' will have multiple entries + * in this table. Precisely, a codeword of length n will be represented + * by 2**(max_codeword_len - n) entries in this table. The 0-based index + * of each such entry will contain the corresponding codeword as a prefix + * when zero-padded on the left to 'max_codeword_len' binary digits. + * + * That's the basic idea, but we implement two optimizations regarding + * the format of the decode table itself: + * + * - For many compression formats, the maximum codeword length is too + * long for it to be efficient to build the full decoding table + * whenever a new prefix code is used. Instead, we can build the table + * using only 2**table_bits entries, where 'table_bits' is some number + * less than or equal to 'max_codeword_len'. Then, only codewords of + * length 'table_bits' and shorter can be directly looked up. For + * longer codewords, the direct lookup instead produces the root of a + * binary tree. Using this tree, the decoder can do traditional + * bit-by-bit decoding of the remainder of the codeword. Child nodes + * are allocated in extra entries at the end of the table; leaf nodes + * contain symbols. Note that the long-codeword case is, in general, + * not performance critical, since in Huffman codes the most frequently + * used symbols are assigned the shortest codeword lengths. + * + * - When we decode a symbol using a direct lookup of the table, we still + * need to know its length so that the bitstream can be advanced by the + * appropriate number of bits. The simple solution is to simply retain + * the 'lens' array and use the decoded symbol as an index into it. + * However, this requires two separate array accesses in the fast path. + * The optimization is to store the length directly in the decode + * table. We use the bottom 11 bits for the symbol and the top 5 bits + * for the length. In addition, to combine this optimization with the + * previous one, we introduce a special case where the top 2 bits of + * the length are both set if the entry is actually the root of a + * binary tree. + * + * @decode_table: + * The array in which to create the decoding table. This must have + * a length of at least ((2**table_bits) + 2 * num_syms) entries. + * + * @num_syms: + * The number of symbols in the alphabet; also, the length of the + * 'lens' array. Must be less than or equal to 2048. + * + * @table_bits: + * The order of the decode table size, as explained above. Must be + * less than or equal to 13. + * + * @lens: + * An array of length @num_syms, indexable by symbol, that gives the + * length of the codeword, in bits, for that symbol. The length can + * be 0, which means that the symbol does not have a codeword + * assigned. + * + * @max_codeword_len: + * The longest codeword length allowed in the compression format. + * All entries in 'lens' must be less than or equal to this value. + * This must be less than or equal to 23. + * + * @working_space + * A temporary array of length '2 * (max_codeword_len + 1) + + * num_syms'. + * + * Returns 0 on success, or -1 if the lengths do not form a valid prefix + * code. + */ +int make_huffman_decode_table(u16 decode_table[], const u32 num_syms, + const u32 table_bits, const u8 lens[], + const u32 max_codeword_len, + u16 working_space[]) +{ + const u32 table_num_entries = 1 << table_bits; + u16 * const len_counts = &working_space[0]; + u16 * const offsets = &working_space[1 * (max_codeword_len + 1)]; + u16 * const sorted_syms = &working_space[2 * (max_codeword_len + 1)]; + int left; + void *decode_table_ptr; + u32 sym_idx; + u32 codeword_len; + u32 stores_per_loop; + u32 decode_table_pos; + u32 len; + u32 sym; + + /* Count how many symbols have each possible codeword length. + * Note that a length of 0 indicates the corresponding symbol is not + * used in the code and therefore does not have a codeword. + */ + for (len = 0; len <= max_codeword_len; len++) + len_counts[len] = 0; + for (sym = 0; sym < num_syms; sym++) + len_counts[lens[sym]]++; + + /* We can assume all lengths are <= max_codeword_len, but we + * cannot assume they form a valid prefix code. A codeword of + * length n should require a proportion of the codespace equaling + * (1/2)^n. The code is valid if and only if the codespace is + * exactly filled by the lengths, by this measure. + */ + left = 1; + for (len = 1; len <= max_codeword_len; len++) { + left <<= 1; + left -= len_counts[len]; + if (left < 0) { + /* The lengths overflow the codespace; that is, the code + * is over-subscribed. + */ + return -1; + } + } + + if (left) { + /* The lengths do not fill the codespace; that is, they form an + * incomplete set. + */ + if (left == (1 << max_codeword_len)) { + /* The code is completely empty. This is arguably + * invalid, but in fact it is valid in LZX and XPRESS, + * so we must allow it. By definition, no symbols can + * be decoded with an empty code. Consequently, we + * technically don't even need to fill in the decode + * table. However, to avoid accessing uninitialized + * memory if the algorithm nevertheless attempts to + * decode symbols using such a code, we zero out the + * decode table. + */ + memset(decode_table, 0, + table_num_entries * sizeof(decode_table[0])); + return 0; + } + return -1; + } + + /* Sort the symbols primarily by length and secondarily by symbol order. + */ + + /* Initialize 'offsets' so that offsets[len] for 1 <= len <= + * max_codeword_len is the number of codewords shorter than 'len' bits. + */ + offsets[1] = 0; + for (len = 1; len < max_codeword_len; len++) + offsets[len + 1] = offsets[len] + len_counts[len]; + + /* Use the 'offsets' array to sort the symbols. Note that we do not + * include symbols that are not used in the code. Consequently, fewer + * than 'num_syms' entries in 'sorted_syms' may be filled. + */ + for (sym = 0; sym < num_syms; sym++) + if (lens[sym]) + sorted_syms[offsets[lens[sym]]++] = sym; + + /* Fill entries for codewords with length <= table_bits + * --- that is, those short enough for a direct mapping. + * + * The table will start with entries for the shortest codeword(s), which + * have the most entries. From there, the number of entries per + * codeword will decrease. + */ + decode_table_ptr = decode_table; + sym_idx = 0; + codeword_len = 1; + stores_per_loop = (1 << (table_bits - codeword_len)); + for (; stores_per_loop != 0; codeword_len++, stores_per_loop >>= 1) { + u32 end_sym_idx = sym_idx + len_counts[codeword_len]; + + for (; sym_idx < end_sym_idx; sym_idx++) { + u16 entry; + u16 *p; + u32 n; + + entry = ((u32)codeword_len << 11) | sorted_syms[sym_idx]; + p = (u16 *)decode_table_ptr; + n = stores_per_loop; + + do { + *p++ = entry; + } while (--n); + + decode_table_ptr = p; + } + } + + /* If we've filled in the entire table, we are done. Otherwise, + * there are codewords longer than table_bits for which we must + * generate binary trees. + */ + decode_table_pos = (u16 *)decode_table_ptr - decode_table; + if (decode_table_pos != table_num_entries) { + u32 j; + u32 next_free_tree_slot; + u32 cur_codeword; + + /* First, zero out the remaining entries. This is + * necessary so that these entries appear as + * "unallocated" in the next part. Each of these entries + * will eventually be filled with the representation of + * the root node of a binary tree. + */ + j = decode_table_pos; + do { + decode_table[j] = 0; + } while (++j != table_num_entries); + + /* We allocate child nodes starting at the end of the + * direct lookup table. Note that there should be + * 2*num_syms extra entries for this purpose, although + * fewer than this may actually be needed. + */ + next_free_tree_slot = table_num_entries; + + /* Iterate through each codeword with length greater than + * 'table_bits', primarily in order of codeword length + * and secondarily in order of symbol. + */ + for (cur_codeword = decode_table_pos << 1; + codeword_len <= max_codeword_len; + codeword_len++, cur_codeword <<= 1) { + u32 end_sym_idx = sym_idx + len_counts[codeword_len]; + + for (; sym_idx < end_sym_idx; sym_idx++, cur_codeword++) { + /* 'sorted_sym' is the symbol represented by the + * codeword. + */ + u32 sorted_sym = sorted_syms[sym_idx]; + u32 extra_bits = codeword_len - table_bits; + u32 node_idx = cur_codeword >> extra_bits; + + /* Go through each bit of the current codeword + * beyond the prefix of length @table_bits and + * walk the appropriate binary tree, allocating + * any slots that have not yet been allocated. + * + * Note that the 'pointer' entry to the binary + * tree, which is stored in the direct lookup + * portion of the table, is represented + * identically to other internal (non-leaf) + * nodes of the binary tree; it can be thought + * of as simply the root of the tree. The + * representation of these internal nodes is + * simply the index of the left child combined + * with the special bits 0xC000 to distingush + * the entry from direct mapping and leaf node + * entries. + */ + do { + /* At least one bit remains in the + * codeword, but the current node is an + * unallocated leaf. Change it to an + * internal node. + */ + if (decode_table[node_idx] == 0) { + decode_table[node_idx] = + next_free_tree_slot | 0xC000; + decode_table[next_free_tree_slot++] = 0; + decode_table[next_free_tree_slot++] = 0; + } + + /* Go to the left child if the next bit + * in the codeword is 0; otherwise go to + * the right child. + */ + node_idx = decode_table[node_idx] & 0x3FFF; + --extra_bits; + node_idx += (cur_codeword >> extra_bits) & 1; + } while (extra_bits != 0); + + /* We've traversed the tree using the entire + * codeword, and we're now at the entry where + * the actual symbol will be stored. This is + * distinguished from internal nodes by not + * having its high two bits set. + */ + decode_table[node_idx] = sorted_sym; + } + } + } + return 0; +} diff --git a/fs/ntfs3/lib/decompress_common.h b/fs/ntfs3/lib/decompress_common.h new file mode 100644 index 000000000000..66297f398403 --- /dev/null +++ b/fs/ntfs3/lib/decompress_common.h @@ -0,0 +1,352 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +/* + * decompress_common.h - Code shared by the XPRESS and LZX decompressors + * + * Copyright (C) 2015 Eric Biggers + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include +#include +#include +#include +#include + + +/* "Force inline" macro (not required, but helpful for performance) */ +#define forceinline __always_inline + +/* Enable whole-word match copying on selected architectures */ +#if defined(__i386__) || defined(__x86_64__) || defined(__ARM_FEATURE_UNALIGNED) +# define FAST_UNALIGNED_ACCESS +#endif + +/* Size of a machine word */ +#define WORDBYTES (sizeof(size_t)) + +static forceinline void +copy_unaligned_word(const void *src, void *dst) +{ + put_unaligned(get_unaligned((const size_t *)src), (size_t *)dst); +} + + +/* Generate a "word" with platform-dependent size whose bytes all contain the + * value 'b'. + */ +static forceinline size_t repeat_byte(u8 b) +{ + size_t v; + + v = b; + v |= v << 8; + v |= v << 16; + v |= v << ((WORDBYTES == 8) ? 32 : 0); + return v; +} + +/* Structure that encapsulates a block of in-memory data being interpreted as a + * stream of bits, optionally with interwoven literal bytes. Bits are assumed + * to be stored in little endian 16-bit coding units, with the bits ordered high + * to low. + */ +struct input_bitstream { + + /* Bits that have been read from the input buffer. The bits are + * left-justified; the next bit is always bit 31. + */ + u32 bitbuf; + + /* Number of bits currently held in @bitbuf. */ + u32 bitsleft; + + /* Pointer to the next byte to be retrieved from the input buffer. */ + const u8 *next; + + /* Pointer to just past the end of the input buffer. */ + const u8 *end; +}; + +/* Initialize a bitstream to read from the specified input buffer. */ +static forceinline void init_input_bitstream(struct input_bitstream *is, + const void *buffer, u32 size) +{ + is->bitbuf = 0; + is->bitsleft = 0; + is->next = buffer; + is->end = is->next + size; +} + +/* Ensure the bit buffer variable for the bitstream contains at least @num_bits + * bits. Following this, bitstream_peek_bits() and/or bitstream_remove_bits() + * may be called on the bitstream to peek or remove up to @num_bits bits. Note + * that @num_bits must be <= 16. + */ +static forceinline void bitstream_ensure_bits(struct input_bitstream *is, + u32 num_bits) +{ + if (is->bitsleft < num_bits) { + if (is->end - is->next >= 2) { + is->bitbuf |= (u32)get_unaligned_le16(is->next) + << (16 - is->bitsleft); + is->next += 2; + } + is->bitsleft += 16; + } +} + +/* Return the next @num_bits bits from the bitstream, without removing them. + * There must be at least @num_bits remaining in the buffer variable, from a + * previous call to bitstream_ensure_bits(). + */ +static forceinline u32 +bitstream_peek_bits(const struct input_bitstream *is, const u32 num_bits) +{ + return (is->bitbuf >> 1) >> (sizeof(is->bitbuf) * 8 - num_bits - 1); +} + +/* Remove @num_bits from the bitstream. There must be at least @num_bits + * remaining in the buffer variable, from a previous call to + * bitstream_ensure_bits(). + */ +static forceinline void +bitstream_remove_bits(struct input_bitstream *is, u32 num_bits) +{ + is->bitbuf <<= num_bits; + is->bitsleft -= num_bits; +} + +/* Remove and return @num_bits bits from the bitstream. There must be at least + * @num_bits remaining in the buffer variable, from a previous call to + * bitstream_ensure_bits(). + */ +static forceinline u32 +bitstream_pop_bits(struct input_bitstream *is, u32 num_bits) +{ + u32 bits = bitstream_peek_bits(is, num_bits); + + bitstream_remove_bits(is, num_bits); + return bits; +} + +/* Read and return the next @num_bits bits from the bitstream. */ +static forceinline u32 +bitstream_read_bits(struct input_bitstream *is, u32 num_bits) +{ + bitstream_ensure_bits(is, num_bits); + return bitstream_pop_bits(is, num_bits); +} + +/* Read and return the next literal byte embedded in the bitstream. */ +static forceinline u8 +bitstream_read_byte(struct input_bitstream *is) +{ + if (unlikely(is->end == is->next)) + return 0; + return *is->next++; +} + +/* Read and return the next 16-bit integer embedded in the bitstream. */ +static forceinline u16 +bitstream_read_u16(struct input_bitstream *is) +{ + u16 v; + + if (unlikely(is->end - is->next < 2)) + return 0; + v = get_unaligned_le16(is->next); + is->next += 2; + return v; +} + +/* Read and return the next 32-bit integer embedded in the bitstream. */ +static forceinline u32 +bitstream_read_u32(struct input_bitstream *is) +{ + u32 v; + + if (unlikely(is->end - is->next < 4)) + return 0; + v = get_unaligned_le32(is->next); + is->next += 4; + return v; +} + +/* Read into @dst_buffer an array of literal bytes embedded in the bitstream. + * Return either a pointer to the byte past the last written, or NULL if the + * read overflows the input buffer. + */ +static forceinline void *bitstream_read_bytes(struct input_bitstream *is, + void *dst_buffer, size_t count) +{ + if ((size_t)(is->end - is->next) < count) + return NULL; + memcpy(dst_buffer, is->next, count); + is->next += count; + return (u8 *)dst_buffer + count; +} + +/* Align the input bitstream on a coding-unit boundary. */ +static forceinline void bitstream_align(struct input_bitstream *is) +{ + is->bitsleft = 0; + is->bitbuf = 0; +} + +extern int make_huffman_decode_table(u16 decode_table[], const u32 num_syms, + const u32 num_bits, const u8 lens[], + const u32 max_codeword_len, + u16 working_space[]); + + +/* Reads and returns the next Huffman-encoded symbol from a bitstream. If the + * input data is exhausted, the Huffman symbol is decoded as if the missing bits + * are all zeroes. + */ +static forceinline u32 read_huffsym(struct input_bitstream *istream, + const u16 decode_table[], + u32 table_bits, + u32 max_codeword_len) +{ + u32 entry; + u32 key_bits; + + bitstream_ensure_bits(istream, max_codeword_len); + + /* Index the decode table by the next table_bits bits of the input. */ + key_bits = bitstream_peek_bits(istream, table_bits); + entry = decode_table[key_bits]; + if (entry < 0xC000) { + /* Fast case: The decode table directly provided the + * symbol and codeword length. The low 11 bits are the + * symbol, and the high 5 bits are the codeword length. + */ + bitstream_remove_bits(istream, entry >> 11); + return entry & 0x7FF; + } + /* Slow case: The codeword for the symbol is longer than + * table_bits, so the symbol does not have an entry + * directly in the first (1 << table_bits) entries of the + * decode table. Traverse the appropriate binary tree + * bit-by-bit to decode the symbol. + */ + bitstream_remove_bits(istream, table_bits); + do { + key_bits = (entry & 0x3FFF) + bitstream_pop_bits(istream, 1); + } while ((entry = decode_table[key_bits]) >= 0xC000); + return entry; +} + +/* + * Copy an LZ77 match at (dst - offset) to dst. + * + * The length and offset must be already validated --- that is, (dst - offset) + * can't underrun the output buffer, and (dst + length) can't overrun the output + * buffer. Also, the length cannot be 0. + * + * @bufend points to the byte past the end of the output buffer. This function + * won't write any data beyond this position. + * + * Returns dst + length. + */ +static forceinline u8 *lz_copy(u8 *dst, u32 length, u32 offset, const u8 *bufend, + u32 min_length) +{ + const u8 *src = dst - offset; + + /* + * Try to copy one machine word at a time. On i386 and x86_64 this is + * faster than copying one byte at a time, unless the data is + * near-random and all the matches have very short lengths. Note that + * since this requires unaligned memory accesses, it won't necessarily + * be faster on every architecture. + * + * Also note that we might copy more than the length of the match. For + * example, if a word is 8 bytes and the match is of length 5, then + * we'll simply copy 8 bytes. This is okay as long as we don't write + * beyond the end of the output buffer, hence the check for (bufend - + * end >= WORDBYTES - 1). + */ +#ifdef FAST_UNALIGNED_ACCESS + u8 * const end = dst + length; + + if (bufend - end >= (ptrdiff_t)(WORDBYTES - 1)) { + + if (offset >= WORDBYTES) { + /* The source and destination words don't overlap. */ + + /* To improve branch prediction, one iteration of this + * loop is unrolled. Most matches are short and will + * fail the first check. But if that check passes, then + * it becomes increasing likely that the match is long + * and we'll need to continue copying. + */ + + copy_unaligned_word(src, dst); + src += WORDBYTES; + dst += WORDBYTES; + + if (dst < end) { + do { + copy_unaligned_word(src, dst); + src += WORDBYTES; + dst += WORDBYTES; + } while (dst < end); + } + return end; + } else if (offset == 1) { + + /* Offset 1 matches are equivalent to run-length + * encoding of the previous byte. This case is common + * if the data contains many repeated bytes. + */ + size_t v = repeat_byte(*(dst - 1)); + + do { + put_unaligned(v, (size_t *)dst); + src += WORDBYTES; + dst += WORDBYTES; + } while (dst < end); + return end; + } + /* + * We don't bother with special cases for other 'offset < + * WORDBYTES', which are usually rarer than 'offset == 1'. Extra + * checks will just slow things down. Actually, it's possible + * to handle all the 'offset < WORDBYTES' cases using the same + * code, but it still becomes more complicated doesn't seem any + * faster overall; it definitely slows down the more common + * 'offset == 1' case. + */ + } +#endif /* FAST_UNALIGNED_ACCESS */ + + /* Fall back to a bytewise copy. */ + + if (min_length >= 2) { + *dst++ = *src++; + length--; + } + if (min_length >= 3) { + *dst++ = *src++; + length--; + } + do { + *dst++ = *src++; + } while (--length); + + return dst; +} diff --git a/fs/ntfs3/lib/lib.h b/fs/ntfs3/lib/lib.h new file mode 100644 index 000000000000..f508fbad2e71 --- /dev/null +++ b/fs/ntfs3/lib/lib.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Adapted for linux kernel by Alexander Mamaev: + * - remove implementations of get_unaligned_ + * - assume GCC is always defined + * - ISO C90 + * - linux kernel code style + */ + + +/* globals from xpress_decompress.c */ +struct xpress_decompressor *xpress_allocate_decompressor(void); +void xpress_free_decompressor(struct xpress_decompressor *d); +int xpress_decompress(struct xpress_decompressor *__restrict d, + const void *__restrict compressed_data, + size_t compressed_size, + void *__restrict uncompressed_data, + size_t uncompressed_size); + +/* globals from lzx_decompress.c */ +struct lzx_decompressor *lzx_allocate_decompressor(void); +void lzx_free_decompressor(struct lzx_decompressor *d); +int lzx_decompress(struct lzx_decompressor *__restrict d, + const void *__restrict compressed_data, + size_t compressed_size, void *__restrict uncompressed_data, + size_t uncompressed_size); diff --git a/fs/ntfs3/lib/lzx_decompress.c b/fs/ntfs3/lib/lzx_decompress.c new file mode 100644 index 000000000000..77a381a693d1 --- /dev/null +++ b/fs/ntfs3/lib/lzx_decompress.c @@ -0,0 +1,683 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * lzx_decompress.c - A decompressor for the LZX compression format, which can + * be used in "System Compressed" files. This is based on the code from wimlib. + * This code only supports a window size (dictionary size) of 32768 bytes, since + * this is the only size used in System Compression. + * + * Copyright (C) 2015 Eric Biggers + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "decompress_common.h" +#include "lib.h" + +/* Number of literal byte values */ +#define LZX_NUM_CHARS 256 + +/* The smallest and largest allowed match lengths */ +#define LZX_MIN_MATCH_LEN 2 +#define LZX_MAX_MATCH_LEN 257 + +/* Number of distinct match lengths that can be represented */ +#define LZX_NUM_LENS (LZX_MAX_MATCH_LEN - LZX_MIN_MATCH_LEN + 1) + +/* Number of match lengths for which no length symbol is required */ +#define LZX_NUM_PRIMARY_LENS 7 +#define LZX_NUM_LEN_HEADERS (LZX_NUM_PRIMARY_LENS + 1) + +/* Valid values of the 3-bit block type field */ +#define LZX_BLOCKTYPE_VERBATIM 1 +#define LZX_BLOCKTYPE_ALIGNED 2 +#define LZX_BLOCKTYPE_UNCOMPRESSED 3 + +/* Number of offset slots for a window size of 32768 */ +#define LZX_NUM_OFFSET_SLOTS 30 + +/* Number of symbols in the main code for a window size of 32768 */ +#define LZX_MAINCODE_NUM_SYMBOLS \ + (LZX_NUM_CHARS + (LZX_NUM_OFFSET_SLOTS * LZX_NUM_LEN_HEADERS)) + +/* Number of symbols in the length code */ +#define LZX_LENCODE_NUM_SYMBOLS (LZX_NUM_LENS - LZX_NUM_PRIMARY_LENS) + +/* Number of symbols in the precode */ +#define LZX_PRECODE_NUM_SYMBOLS 20 + +/* Number of bits in which each precode codeword length is represented */ +#define LZX_PRECODE_ELEMENT_SIZE 4 + +/* Number of low-order bits of each match offset that are entropy-encoded in + * aligned offset blocks + */ +#define LZX_NUM_ALIGNED_OFFSET_BITS 3 + +/* Number of symbols in the aligned offset code */ +#define LZX_ALIGNEDCODE_NUM_SYMBOLS (1 << LZX_NUM_ALIGNED_OFFSET_BITS) + +/* Mask for the match offset bits that are entropy-encoded in aligned offset + * blocks + */ +#define LZX_ALIGNED_OFFSET_BITMASK ((1 << LZX_NUM_ALIGNED_OFFSET_BITS) - 1) + +/* Number of bits in which each aligned offset codeword length is represented */ +#define LZX_ALIGNEDCODE_ELEMENT_SIZE 3 + +/* Maximum lengths (in bits) of the codewords in each Huffman code */ +#define LZX_MAX_MAIN_CODEWORD_LEN 16 +#define LZX_MAX_LEN_CODEWORD_LEN 16 +#define LZX_MAX_PRE_CODEWORD_LEN ((1 << LZX_PRECODE_ELEMENT_SIZE) - 1) +#define LZX_MAX_ALIGNED_CODEWORD_LEN ((1 << LZX_ALIGNEDCODE_ELEMENT_SIZE) - 1) + +/* The default "filesize" value used in pre/post-processing. In the LZX format + * used in cabinet files this value must be given to the decompressor, whereas + * in the LZX format used in WIM files and system-compressed files this value is + * fixed at 12000000. + */ +#define LZX_DEFAULT_FILESIZE 12000000 + +/* Assumed block size when the encoded block size begins with a 0 bit. */ +#define LZX_DEFAULT_BLOCK_SIZE 32768 + +/* Number of offsets in the recent (or "repeat") offsets queue. */ +#define LZX_NUM_RECENT_OFFSETS 3 + +/* These values are chosen for fast decompression. */ +#define LZX_MAINCODE_TABLEBITS 11 +#define LZX_LENCODE_TABLEBITS 10 +#define LZX_PRECODE_TABLEBITS 6 +#define LZX_ALIGNEDCODE_TABLEBITS 7 + +#define LZX_READ_LENS_MAX_OVERRUN 50 + +/* Mapping: offset slot => first match offset that uses that offset slot. + */ +static const u32 lzx_offset_slot_base[LZX_NUM_OFFSET_SLOTS + 1] = { + 0, 1, 2, 3, 4, /* 0 --- 4 */ + 6, 8, 12, 16, 24, /* 5 --- 9 */ + 32, 48, 64, 96, 128, /* 10 --- 14 */ + 192, 256, 384, 512, 768, /* 15 --- 19 */ + 1024, 1536, 2048, 3072, 4096, /* 20 --- 24 */ + 6144, 8192, 12288, 16384, 24576, /* 25 --- 29 */ + 32768, /* extra */ +}; + +/* Mapping: offset slot => how many extra bits must be read and added to the + * corresponding offset slot base to decode the match offset. + */ +static const u8 lzx_extra_offset_bits[LZX_NUM_OFFSET_SLOTS] = { + 0, 0, 0, 0, 1, + 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, + 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, + 11, 12, 12, 13, 13, +}; + +/* Reusable heap-allocated memory for LZX decompression */ +struct lzx_decompressor { + + /* Huffman decoding tables, and arrays that map symbols to codeword + * lengths + */ + + u16 maincode_decode_table[(1 << LZX_MAINCODE_TABLEBITS) + + (LZX_MAINCODE_NUM_SYMBOLS * 2)]; + u8 maincode_lens[LZX_MAINCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN]; + + + u16 lencode_decode_table[(1 << LZX_LENCODE_TABLEBITS) + + (LZX_LENCODE_NUM_SYMBOLS * 2)]; + u8 lencode_lens[LZX_LENCODE_NUM_SYMBOLS + LZX_READ_LENS_MAX_OVERRUN]; + + + u16 alignedcode_decode_table[(1 << LZX_ALIGNEDCODE_TABLEBITS) + + (LZX_ALIGNEDCODE_NUM_SYMBOLS * 2)]; + u8 alignedcode_lens[LZX_ALIGNEDCODE_NUM_SYMBOLS]; + + u16 precode_decode_table[(1 << LZX_PRECODE_TABLEBITS) + + (LZX_PRECODE_NUM_SYMBOLS * 2)]; + u8 precode_lens[LZX_PRECODE_NUM_SYMBOLS]; + + /* Temporary space for make_huffman_decode_table() */ + u16 working_space[2 * (1 + LZX_MAX_MAIN_CODEWORD_LEN) + + LZX_MAINCODE_NUM_SYMBOLS]; +}; + +static void undo_e8_translation(void *target, s32 input_pos) +{ + s32 abs_offset, rel_offset; + + abs_offset = get_unaligned_le32(target); + if (abs_offset >= 0) { + if (abs_offset < LZX_DEFAULT_FILESIZE) { + /* "good translation" */ + rel_offset = abs_offset - input_pos; + put_unaligned_le32(rel_offset, target); + } + } else { + if (abs_offset >= -input_pos) { + /* "compensating translation" */ + rel_offset = abs_offset + LZX_DEFAULT_FILESIZE; + put_unaligned_le32(rel_offset, target); + } + } +} + +/* + * Undo the 'E8' preprocessing used in LZX. Before compression, the + * uncompressed data was preprocessed by changing the targets of suspected x86 + * CALL instructions from relative offsets to absolute offsets. After + * match/literal decoding, the decompressor must undo the translation. + */ +static void lzx_postprocess(u8 *data, u32 size) +{ + /* + * A worthwhile optimization is to push the end-of-buffer check into the + * relatively rare E8 case. This is possible if we replace the last six + * bytes of data with E8 bytes; then we are guaranteed to hit an E8 byte + * before reaching end-of-buffer. In addition, this scheme guarantees + * that no translation can begin following an E8 byte in the last 10 + * bytes because a 4-byte offset containing E8 as its high byte is a + * large negative number that is not valid for translation. That is + * exactly what we need. + */ + u8 *tail; + u8 saved_bytes[6]; + u8 *p; + + if (size <= 10) + return; + + tail = &data[size - 6]; + memcpy(saved_bytes, tail, 6); + memset(tail, 0xE8, 6); + p = data; + for (;;) { + while (*p != 0xE8) + p++; + if (p >= tail) + break; + undo_e8_translation(p + 1, p - data); + p += 5; + } + memcpy(tail, saved_bytes, 6); +} + +/* Read a Huffman-encoded symbol using the precode. */ +static forceinline u32 read_presym(const struct lzx_decompressor *d, + struct input_bitstream *is) +{ + return read_huffsym(is, d->precode_decode_table, + LZX_PRECODE_TABLEBITS, LZX_MAX_PRE_CODEWORD_LEN); +} + +/* Read a Huffman-encoded symbol using the main code. */ +static forceinline u32 read_mainsym(const struct lzx_decompressor *d, + struct input_bitstream *is) +{ + return read_huffsym(is, d->maincode_decode_table, + LZX_MAINCODE_TABLEBITS, LZX_MAX_MAIN_CODEWORD_LEN); +} + +/* Read a Huffman-encoded symbol using the length code. */ +static forceinline u32 read_lensym(const struct lzx_decompressor *d, + struct input_bitstream *is) +{ + return read_huffsym(is, d->lencode_decode_table, + LZX_LENCODE_TABLEBITS, LZX_MAX_LEN_CODEWORD_LEN); +} + +/* Read a Huffman-encoded symbol using the aligned offset code. */ +static forceinline u32 read_alignedsym(const struct lzx_decompressor *d, + struct input_bitstream *is) +{ + return read_huffsym(is, d->alignedcode_decode_table, + LZX_ALIGNEDCODE_TABLEBITS, + LZX_MAX_ALIGNED_CODEWORD_LEN); +} + +/* + * Read the precode from the compressed input bitstream, then use it to decode + * @num_lens codeword length values. + * + * @is: The input bitstream. + * + * @lens: An array that contains the length values from the previous time + * the codeword lengths for this Huffman code were read, or all 0's + * if this is the first time. This array must have at least + * (@num_lens + LZX_READ_LENS_MAX_OVERRUN) entries. + * + * @num_lens: Number of length values to decode. + * + * Returns 0 on success, or -1 if the data was invalid. + */ +static int lzx_read_codeword_lens(struct lzx_decompressor *d, + struct input_bitstream *is, + u8 *lens, u32 num_lens) +{ + u8 *len_ptr = lens; + u8 *lens_end = lens + num_lens; + int i; + + /* Read the lengths of the precode codewords. These are given + * explicitly. + */ + for (i = 0; i < LZX_PRECODE_NUM_SYMBOLS; i++) { + d->precode_lens[i] = + bitstream_read_bits(is, LZX_PRECODE_ELEMENT_SIZE); + } + + /* Make the decoding table for the precode. */ + if (make_huffman_decode_table(d->precode_decode_table, + LZX_PRECODE_NUM_SYMBOLS, + LZX_PRECODE_TABLEBITS, + d->precode_lens, + LZX_MAX_PRE_CODEWORD_LEN, + d->working_space)) + return -1; + + /* Decode the codeword lengths. */ + do { + u32 presym; + u8 len; + + /* Read the next precode symbol. */ + presym = read_presym(d, is); + if (presym < 17) { + /* Difference from old length */ + len = *len_ptr - presym; + if ((s8)len < 0) + len += 17; + *len_ptr++ = len; + } else { + /* Special RLE values */ + + u32 run_len; + + if (presym == 17) { + /* Run of 0's */ + run_len = 4 + bitstream_read_bits(is, 4); + len = 0; + } else if (presym == 18) { + /* Longer run of 0's */ + run_len = 20 + bitstream_read_bits(is, 5); + len = 0; + } else { + /* Run of identical lengths */ + run_len = 4 + bitstream_read_bits(is, 1); + presym = read_presym(d, is); + if (presym > 17) + return -1; + len = *len_ptr - presym; + if ((s8)len < 0) + len += 17; + } + + do { + *len_ptr++ = len; + } while (--run_len); + /* Worst case overrun is when presym == 18, + * run_len == 20 + 31, and only 1 length was remaining. + * So LZX_READ_LENS_MAX_OVERRUN == 50. + * + * Overrun while reading the first half of maincode_lens + * can corrupt the previous values in the second half. + * This doesn't really matter because the resulting + * lengths will still be in range, and data that + * generates overruns is invalid anyway. + */ + } + } while (len_ptr < lens_end); + + return 0; +} + +/* + * Read the header of an LZX block and save the block type and (uncompressed) + * size in *block_type_ret and *block_size_ret, respectively. + * + * If the block is compressed, also update the Huffman decode @tables with the + * new Huffman codes. If the block is uncompressed, also update the match + * offset @queue with the new match offsets. + * + * Return 0 on success, or -1 if the data was invalid. + */ +static int lzx_read_block_header(struct lzx_decompressor *d, + struct input_bitstream *is, + int *block_type_ret, + u32 *block_size_ret, + u32 recent_offsets[]) +{ + int block_type; + u32 block_size; + int i; + + bitstream_ensure_bits(is, 4); + + /* The first three bits tell us what kind of block it is, and should be + * one of the LZX_BLOCKTYPE_* values. + */ + block_type = bitstream_pop_bits(is, 3); + + /* Read the block size. */ + if (bitstream_pop_bits(is, 1)) { + block_size = LZX_DEFAULT_BLOCK_SIZE; + } else { + block_size = 0; + block_size |= bitstream_read_bits(is, 8); + block_size <<= 8; + block_size |= bitstream_read_bits(is, 8); + } + + switch (block_type) { + + case LZX_BLOCKTYPE_ALIGNED: + + /* Read the aligned offset code and prepare its decode table. + */ + + for (i = 0; i < LZX_ALIGNEDCODE_NUM_SYMBOLS; i++) { + d->alignedcode_lens[i] = + bitstream_read_bits(is, + LZX_ALIGNEDCODE_ELEMENT_SIZE); + } + + if (make_huffman_decode_table(d->alignedcode_decode_table, + LZX_ALIGNEDCODE_NUM_SYMBOLS, + LZX_ALIGNEDCODE_TABLEBITS, + d->alignedcode_lens, + LZX_MAX_ALIGNED_CODEWORD_LEN, + d->working_space)) + return -1; + + /* Fall though, since the rest of the header for aligned offset + * blocks is the same as that for verbatim blocks. + */ + fallthrough; + + case LZX_BLOCKTYPE_VERBATIM: + + /* Read the main code and prepare its decode table. + * + * Note that the codeword lengths in the main code are encoded + * in two parts: one part for literal symbols, and one part for + * match symbols. + */ + + if (lzx_read_codeword_lens(d, is, d->maincode_lens, + LZX_NUM_CHARS)) + return -1; + + if (lzx_read_codeword_lens(d, is, + d->maincode_lens + LZX_NUM_CHARS, + LZX_MAINCODE_NUM_SYMBOLS - LZX_NUM_CHARS)) + return -1; + + if (make_huffman_decode_table(d->maincode_decode_table, + LZX_MAINCODE_NUM_SYMBOLS, + LZX_MAINCODE_TABLEBITS, + d->maincode_lens, + LZX_MAX_MAIN_CODEWORD_LEN, + d->working_space)) + return -1; + + /* Read the length code and prepare its decode table. */ + + if (lzx_read_codeword_lens(d, is, d->lencode_lens, + LZX_LENCODE_NUM_SYMBOLS)) + return -1; + + if (make_huffman_decode_table(d->lencode_decode_table, + LZX_LENCODE_NUM_SYMBOLS, + LZX_LENCODE_TABLEBITS, + d->lencode_lens, + LZX_MAX_LEN_CODEWORD_LEN, + d->working_space)) + return -1; + + break; + + case LZX_BLOCKTYPE_UNCOMPRESSED: + + /* Before reading the three recent offsets from the uncompressed + * block header, the stream must be aligned on a 16-bit + * boundary. But if the stream is *already* aligned, then the + * next 16 bits must be discarded. + */ + bitstream_ensure_bits(is, 1); + bitstream_align(is); + + recent_offsets[0] = bitstream_read_u32(is); + recent_offsets[1] = bitstream_read_u32(is); + recent_offsets[2] = bitstream_read_u32(is); + + /* Offsets of 0 are invalid. */ + if (recent_offsets[0] == 0 || recent_offsets[1] == 0 || + recent_offsets[2] == 0) + return -1; + break; + + default: + /* Unrecognized block type. */ + return -1; + } + + *block_type_ret = block_type; + *block_size_ret = block_size; + return 0; +} + +/* Decompress a block of LZX-compressed data. */ +static int lzx_decompress_block(const struct lzx_decompressor *d, + struct input_bitstream *is, + int block_type, u32 block_size, + u8 * const out_begin, u8 *out_next, + u32 recent_offsets[]) +{ + u8 * const block_end = out_next + block_size; + u32 ones_if_aligned = 0U - (block_type == LZX_BLOCKTYPE_ALIGNED); + + do { + u32 mainsym; + u32 match_len; + u32 match_offset; + u32 offset_slot; + u32 num_extra_bits; + + mainsym = read_mainsym(d, is); + if (mainsym < LZX_NUM_CHARS) { + /* Literal */ + *out_next++ = mainsym; + continue; + } + + /* Match */ + + /* Decode the length header and offset slot. */ + mainsym -= LZX_NUM_CHARS; + match_len = mainsym % LZX_NUM_LEN_HEADERS; + offset_slot = mainsym / LZX_NUM_LEN_HEADERS; + + /* If needed, read a length symbol to decode the full length. */ + if (match_len == LZX_NUM_PRIMARY_LENS) + match_len += read_lensym(d, is); + match_len += LZX_MIN_MATCH_LEN; + + if (offset_slot < LZX_NUM_RECENT_OFFSETS) { + /* Repeat offset */ + + /* Note: This isn't a real LRU queue, since using the R2 + * offset doesn't bump the R1 offset down to R2. This + * quirk allows all 3 recent offsets to be handled by + * the same code. (For R0, the swap is a no-op.) + */ + match_offset = recent_offsets[offset_slot]; + recent_offsets[offset_slot] = recent_offsets[0]; + recent_offsets[0] = match_offset; + } else { + /* Explicit offset */ + + /* Look up the number of extra bits that need to be read + * to decode offsets with this offset slot. + */ + num_extra_bits = lzx_extra_offset_bits[offset_slot]; + + /* Start with the offset slot base value. */ + match_offset = lzx_offset_slot_base[offset_slot]; + + /* In aligned offset blocks, the low-order 3 bits of + * each offset are encoded using the aligned offset + * code. Otherwise, all the extra bits are literal. + */ + + if ((num_extra_bits & ones_if_aligned) >= LZX_NUM_ALIGNED_OFFSET_BITS) { + match_offset += + bitstream_read_bits(is, num_extra_bits - + LZX_NUM_ALIGNED_OFFSET_BITS) + << LZX_NUM_ALIGNED_OFFSET_BITS; + match_offset += read_alignedsym(d, is); + } else { + match_offset += bitstream_read_bits(is, num_extra_bits); + } + + /* Adjust the offset. */ + match_offset -= (LZX_NUM_RECENT_OFFSETS - 1); + + /* Update the recent offsets. */ + recent_offsets[2] = recent_offsets[1]; + recent_offsets[1] = recent_offsets[0]; + recent_offsets[0] = match_offset; + } + + /* Validate the match, then copy it to the current position. */ + + if (match_len > (size_t)(block_end - out_next)) + return -1; + + if (match_offset > (size_t)(out_next - out_begin)) + return -1; + + out_next = lz_copy(out_next, match_len, match_offset, + block_end, LZX_MIN_MATCH_LEN); + + } while (out_next != block_end); + + return 0; +} + +/* + * lzx_allocate_decompressor - Allocate an LZX decompressor + * + * Return the pointer to the decompressor on success, or return NULL and set + * errno on failure. + */ +struct lzx_decompressor *lzx_allocate_decompressor(void) +{ + return kmalloc(sizeof(struct lzx_decompressor), GFP_NOFS); +} + +/* + * lzx_decompress - Decompress a buffer of LZX-compressed data + * + * @decompressor: A decompressor allocated with lzx_allocate_decompressor() + * @compressed_data: The buffer of data to decompress + * @compressed_size: Number of bytes of compressed data + * @uncompressed_data: The buffer in which to store the decompressed data + * @uncompressed_size: The number of bytes the data decompresses into + * + * Return 0 on success, or return -1 and set errno on failure. + */ +int lzx_decompress(struct lzx_decompressor *decompressor, + const void *compressed_data, size_t compressed_size, + void *uncompressed_data, size_t uncompressed_size) +{ + struct lzx_decompressor *d = decompressor; + u8 * const out_begin = uncompressed_data; + u8 *out_next = out_begin; + u8 * const out_end = out_begin + uncompressed_size; + struct input_bitstream is; + u32 recent_offsets[LZX_NUM_RECENT_OFFSETS] = {1, 1, 1}; + int e8_status = 0; + + init_input_bitstream(&is, compressed_data, compressed_size); + + /* Codeword lengths begin as all 0's for delta encoding purposes. */ + memset(d->maincode_lens, 0, LZX_MAINCODE_NUM_SYMBOLS); + memset(d->lencode_lens, 0, LZX_LENCODE_NUM_SYMBOLS); + + /* Decompress blocks until we have all the uncompressed data. */ + + while (out_next != out_end) { + int block_type; + u32 block_size; + + if (lzx_read_block_header(d, &is, &block_type, &block_size, + recent_offsets)) + goto invalid; + + if (block_size < 1 || block_size > (size_t)(out_end - out_next)) + goto invalid; + + if (block_type != LZX_BLOCKTYPE_UNCOMPRESSED) { + + /* Compressed block */ + + if (lzx_decompress_block(d, + &is, + block_type, + block_size, + out_begin, + out_next, + recent_offsets)) + goto invalid; + + e8_status |= d->maincode_lens[0xe8]; + out_next += block_size; + } else { + /* Uncompressed block */ + + out_next = bitstream_read_bytes(&is, out_next, + block_size); + if (!out_next) + goto invalid; + + if (block_size & 1) + bitstream_read_byte(&is); + + e8_status = 1; + } + } + + /* Postprocess the data unless it cannot possibly contain 0xe8 bytes. */ + if (e8_status) + lzx_postprocess(uncompressed_data, uncompressed_size); + + return 0; + +invalid: + return -1; +} + +/* + * lzx_free_decompressor - Free an LZX decompressor + * + * @decompressor: A decompressor that was allocated with + * lzx_allocate_decompressor(), or NULL. + */ +void lzx_free_decompressor(struct lzx_decompressor *decompressor) +{ + kfree(decompressor); +} diff --git a/fs/ntfs3/lib/xpress_decompress.c b/fs/ntfs3/lib/xpress_decompress.c new file mode 100644 index 000000000000..3d98f36a981e --- /dev/null +++ b/fs/ntfs3/lib/xpress_decompress.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * xpress_decompress.c - A decompressor for the XPRESS compression format + * (Huffman variant), which can be used in "System Compressed" files. This is + * based on the code from wimlib. + * + * Copyright (C) 2015 Eric Biggers + * + * This program is free software: you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free Software + * Foundation, either version 2 of the License, or (at your option) any later + * version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#include "decompress_common.h" +#include "lib.h" + +#define XPRESS_NUM_SYMBOLS 512 +#define XPRESS_MAX_CODEWORD_LEN 15 +#define XPRESS_MIN_MATCH_LEN 3 + +/* This value is chosen for fast decompression. */ +#define XPRESS_TABLEBITS 12 + +/* Reusable heap-allocated memory for XPRESS decompression */ +struct xpress_decompressor { + + /* The Huffman decoding table */ + u16 decode_table[(1 << XPRESS_TABLEBITS) + 2 * XPRESS_NUM_SYMBOLS]; + + /* An array that maps symbols to codeword lengths */ + u8 lens[XPRESS_NUM_SYMBOLS]; + + /* Temporary space for make_huffman_decode_table() */ + u16 working_space[2 * (1 + XPRESS_MAX_CODEWORD_LEN) + + XPRESS_NUM_SYMBOLS]; +}; + +/* + * xpress_allocate_decompressor - Allocate an XPRESS decompressor + * + * Return the pointer to the decompressor on success, or return NULL and set + * errno on failure. + */ +struct xpress_decompressor *xpress_allocate_decompressor(void) +{ + return kmalloc(sizeof(struct xpress_decompressor), GFP_NOFS); +} + +/* + * xpress_decompress - Decompress a buffer of XPRESS-compressed data + * + * @decompressor: A decompressor that was allocated with + * xpress_allocate_decompressor() + * @compressed_data: The buffer of data to decompress + * @compressed_size: Number of bytes of compressed data + * @uncompressed_data: The buffer in which to store the decompressed data + * @uncompressed_size: The number of bytes the data decompresses into + * + * Return 0 on success, or return -1 and set errno on failure. + */ +int xpress_decompress(struct xpress_decompressor *decompressor, + const void *compressed_data, size_t compressed_size, + void *uncompressed_data, size_t uncompressed_size) +{ + struct xpress_decompressor *d = decompressor; + const u8 * const in_begin = compressed_data; + u8 * const out_begin = uncompressed_data; + u8 *out_next = out_begin; + u8 * const out_end = out_begin + uncompressed_size; + struct input_bitstream is; + u32 i; + + /* Read the Huffman codeword lengths. */ + if (compressed_size < XPRESS_NUM_SYMBOLS / 2) + goto invalid; + for (i = 0; i < XPRESS_NUM_SYMBOLS / 2; i++) { + d->lens[i*2 + 0] = in_begin[i] & 0xF; + d->lens[i*2 + 1] = in_begin[i] >> 4; + } + + /* Build a decoding table for the Huffman code. */ + if (make_huffman_decode_table(d->decode_table, XPRESS_NUM_SYMBOLS, + XPRESS_TABLEBITS, d->lens, + XPRESS_MAX_CODEWORD_LEN, + d->working_space)) + goto invalid; + + /* Decode the matches and literals. */ + + init_input_bitstream(&is, in_begin + XPRESS_NUM_SYMBOLS / 2, + compressed_size - XPRESS_NUM_SYMBOLS / 2); + + while (out_next != out_end) { + u32 sym; + u32 log2_offset; + u32 length; + u32 offset; + + sym = read_huffsym(&is, d->decode_table, + XPRESS_TABLEBITS, XPRESS_MAX_CODEWORD_LEN); + if (sym < 256) { + /* Literal */ + *out_next++ = sym; + } else { + /* Match */ + length = sym & 0xf; + log2_offset = (sym >> 4) & 0xf; + + bitstream_ensure_bits(&is, 16); + + offset = ((u32)1 << log2_offset) | + bitstream_pop_bits(&is, log2_offset); + + if (length == 0xf) { + length += bitstream_read_byte(&is); + if (length == 0xf + 0xff) + length = bitstream_read_u16(&is); + } + length += XPRESS_MIN_MATCH_LEN; + + if (offset > (size_t)(out_next - out_begin)) + goto invalid; + + if (length > (size_t)(out_end - out_next)) + goto invalid; + + out_next = lz_copy(out_next, length, offset, out_end, + XPRESS_MIN_MATCH_LEN); + } + } + return 0; + +invalid: + return -1; +} + +/* + * xpress_free_decompressor - Free an XPRESS decompressor + * + * @decompressor: A decompressor that was allocated with + * xpress_allocate_decompressor(), or NULL. + */ +void xpress_free_decompressor(struct xpress_decompressor *decompressor) +{ + kfree(decompressor); +} diff --git a/fs/ntfs3/lznt.c b/fs/ntfs3/lznt.c new file mode 100644 index 000000000000..ead9ab7d69b3 --- /dev/null +++ b/fs/ntfs3/lznt.c @@ -0,0 +1,452 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + */ +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" + +// clang-format off +/* src buffer is zero */ +#define LZNT_ERROR_ALL_ZEROS 1 +#define LZNT_CHUNK_SIZE 0x1000 +// clang-format on + +struct lznt_hash { + const u8 *p1; + const u8 *p2; +}; + +struct lznt { + const u8 *unc; + const u8 *unc_end; + const u8 *best_match; + size_t max_len; + bool std; + + struct lznt_hash hash[LZNT_CHUNK_SIZE]; +}; + +static inline size_t get_match_len(const u8 *ptr, const u8 *end, const u8 *prev, + size_t max_len) +{ + size_t len = 0; + + while (ptr + len < end && ptr[len] == prev[len] && ++len < max_len) + ; + return len; +} + +static size_t longest_match_std(const u8 *src, struct lznt *ctx) +{ + size_t hash_index; + size_t len1 = 0, len2 = 0; + const u8 **hash; + + hash_index = + ((40543U * ((((src[0] << 4) ^ src[1]) << 4) ^ src[2])) >> 4) & + (LZNT_CHUNK_SIZE - 1); + + hash = &(ctx->hash[hash_index].p1); + + if (hash[0] >= ctx->unc && hash[0] < src && hash[0][0] == src[0] && + hash[0][1] == src[1] && hash[0][2] == src[2]) { + len1 = 3; + if (ctx->max_len > 3) + len1 += get_match_len(src + 3, ctx->unc_end, + hash[0] + 3, ctx->max_len - 3); + } + + if (hash[1] >= ctx->unc && hash[1] < src && hash[1][0] == src[0] && + hash[1][1] == src[1] && hash[1][2] == src[2]) { + len2 = 3; + if (ctx->max_len > 3) + len2 += get_match_len(src + 3, ctx->unc_end, + hash[1] + 3, ctx->max_len - 3); + } + + /* Compare two matches and select the best one */ + if (len1 < len2) { + ctx->best_match = hash[1]; + len1 = len2; + } else { + ctx->best_match = hash[0]; + } + + hash[1] = hash[0]; + hash[0] = src; + return len1; +} + +static size_t longest_match_best(const u8 *src, struct lznt *ctx) +{ + size_t max_len; + const u8 *ptr; + + if (ctx->unc >= src || !ctx->max_len) + return 0; + + max_len = 0; + for (ptr = ctx->unc; ptr < src; ++ptr) { + size_t len = + get_match_len(src, ctx->unc_end, ptr, ctx->max_len); + if (len >= max_len) { + max_len = len; + ctx->best_match = ptr; + } + } + + return max_len >= 3 ? max_len : 0; +} + +static const size_t s_max_len[] = { + 0x1002, 0x802, 0x402, 0x202, 0x102, 0x82, 0x42, 0x22, 0x12, +}; + +static const size_t s_max_off[] = { + 0x10, 0x20, 0x40, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, +}; + +static inline u16 make_pair(size_t offset, size_t len, size_t index) +{ + return ((offset - 1) << (12 - index)) | + ((len - 3) & (((1 << (12 - index)) - 1))); +} + +static inline size_t parse_pair(u16 pair, size_t *offset, size_t index) +{ + *offset = 1 + (pair >> (12 - index)); + return 3 + (pair & ((1 << (12 - index)) - 1)); +} + +/* + * compress_chunk + * + * returns one of the three values: + * 0 - ok, 'cmpr' contains 'cmpr_chunk_size' bytes of compressed data + * 1 - input buffer is full zero + * -2 - the compressed buffer is too small to hold the compressed data + */ +static inline int compress_chunk(size_t (*match)(const u8 *, struct lznt *), + const u8 *unc, const u8 *unc_end, u8 *cmpr, + u8 *cmpr_end, size_t *cmpr_chunk_size, + struct lznt *ctx) +{ + size_t cnt = 0; + size_t idx = 0; + const u8 *up = unc; + u8 *cp = cmpr + 3; + u8 *cp2 = cmpr + 2; + u8 not_zero = 0; + /* Control byte of 8-bit values: ( 0 - means byte as is, 1 - short pair ) */ + u8 ohdr = 0; + u8 *last; + u16 t16; + + if (unc + LZNT_CHUNK_SIZE < unc_end) + unc_end = unc + LZNT_CHUNK_SIZE; + + last = min(cmpr + LZNT_CHUNK_SIZE + sizeof(short), cmpr_end); + + ctx->unc = unc; + ctx->unc_end = unc_end; + ctx->max_len = s_max_len[0]; + + while (up < unc_end) { + size_t max_len; + + while (unc + s_max_off[idx] < up) + ctx->max_len = s_max_len[++idx]; + + // Find match + max_len = up + 3 <= unc_end ? (*match)(up, ctx) : 0; + + if (!max_len) { + if (cp >= last) + goto NotCompressed; + not_zero |= *cp++ = *up++; + } else if (cp + 1 >= last) { + goto NotCompressed; + } else { + t16 = make_pair(up - ctx->best_match, max_len, idx); + *cp++ = t16; + *cp++ = t16 >> 8; + + ohdr |= 1 << cnt; + up += max_len; + } + + cnt = (cnt + 1) & 7; + if (!cnt) { + *cp2 = ohdr; + ohdr = 0; + cp2 = cp; + cp += 1; + } + } + + if (cp2 < last) + *cp2 = ohdr; + else + cp -= 1; + + *cmpr_chunk_size = cp - cmpr; + + t16 = (*cmpr_chunk_size - 3) | 0xB000; + cmpr[0] = t16; + cmpr[1] = t16 >> 8; + + return not_zero ? 0 : LZNT_ERROR_ALL_ZEROS; + +NotCompressed: + + if ((cmpr + LZNT_CHUNK_SIZE + sizeof(short)) > last) + return -2; + + /* + * Copy non cmpr data + * 0x3FFF == ((LZNT_CHUNK_SIZE + 2 - 3) | 0x3000) + */ + cmpr[0] = 0xff; + cmpr[1] = 0x3f; + + memcpy(cmpr + sizeof(short), unc, LZNT_CHUNK_SIZE); + *cmpr_chunk_size = LZNT_CHUNK_SIZE + sizeof(short); + + return 0; +} + +static inline ssize_t decompress_chunk(u8 *unc, u8 *unc_end, const u8 *cmpr, + const u8 *cmpr_end) +{ + u8 *up = unc; + u8 ch = *cmpr++; + size_t bit = 0; + size_t index = 0; + u16 pair; + size_t offset, length; + + /* Do decompression until pointers are inside range */ + while (up < unc_end && cmpr < cmpr_end) { + /* Correct index */ + while (unc + s_max_off[index] < up) + index += 1; + + /* Check the current flag for zero */ + if (!(ch & (1 << bit))) { + /* Just copy byte */ + *up++ = *cmpr++; + goto next; + } + + /* Check for boundary */ + if (cmpr + 1 >= cmpr_end) + return -EINVAL; + + /* Read a short from little endian stream */ + pair = cmpr[1]; + pair <<= 8; + pair |= cmpr[0]; + + cmpr += 2; + + /* Translate packed information into offset and length */ + length = parse_pair(pair, &offset, index); + + /* Check offset for boundary */ + if (unc + offset > up) + return -EINVAL; + + /* Truncate the length if necessary */ + if (up + length >= unc_end) + length = unc_end - up; + + /* Now we copy bytes. This is the heart of LZ algorithm. */ + for (; length > 0; length--, up++) + *up = *(up - offset); + +next: + /* Advance flag bit value */ + bit = (bit + 1) & 7; + + if (!bit) { + if (cmpr >= cmpr_end) + break; + + ch = *cmpr++; + } + } + + /* return the size of uncompressed data */ + return up - unc; +} + +/* + * 0 - standard compression + * !0 - best compression, requires a lot of cpu + */ +struct lznt *get_lznt_ctx(int level) +{ + struct lznt *r = ntfs_zalloc(level ? offsetof(struct lznt, hash) + : sizeof(struct lznt)); + + if (r) + r->std = !level; + return r; +} + +/* + * compress_lznt + * + * Compresses "unc" into "cmpr" + * +x - ok, 'cmpr' contains 'final_compressed_size' bytes of compressed data + * 0 - input buffer is full zero + */ +size_t compress_lznt(const void *unc, size_t unc_size, void *cmpr, + size_t cmpr_size, struct lznt *ctx) +{ + int err; + size_t (*match)(const u8 *src, struct lznt *ctx); + u8 *p = cmpr; + u8 *end = p + cmpr_size; + const u8 *unc_chunk = unc; + const u8 *unc_end = unc_chunk + unc_size; + bool is_zero = true; + + if (ctx->std) { + match = &longest_match_std; + memset(ctx->hash, 0, sizeof(ctx->hash)); + } else { + match = &longest_match_best; + } + + /* compression cycle */ + for (; unc_chunk < unc_end; unc_chunk += LZNT_CHUNK_SIZE) { + cmpr_size = 0; + err = compress_chunk(match, unc_chunk, unc_end, p, end, + &cmpr_size, ctx); + if (err < 0) + return unc_size; + + if (is_zero && err != LZNT_ERROR_ALL_ZEROS) + is_zero = false; + + p += cmpr_size; + } + + if (p <= end - 2) + p[0] = p[1] = 0; + + return is_zero ? 0 : PtrOffset(cmpr, p); +} + +/* + * decompress_lznt + * + * decompresses "cmpr" into "unc" + */ +ssize_t decompress_lznt(const void *cmpr, size_t cmpr_size, void *unc, + size_t unc_size) +{ + const u8 *cmpr_chunk = cmpr; + const u8 *cmpr_end = cmpr_chunk + cmpr_size; + u8 *unc_chunk = unc; + u8 *unc_end = unc_chunk + unc_size; + u16 chunk_hdr; + + if (cmpr_size < sizeof(short)) + return -EINVAL; + + /* read chunk header */ + chunk_hdr = cmpr_chunk[1]; + chunk_hdr <<= 8; + chunk_hdr |= cmpr_chunk[0]; + + /* loop through decompressing chunks */ + for (;;) { + size_t chunk_size_saved; + size_t unc_use; + size_t cmpr_use = 3 + (chunk_hdr & (LZNT_CHUNK_SIZE - 1)); + + /* Check that the chunk actually fits the supplied buffer */ + if (cmpr_chunk + cmpr_use > cmpr_end) + return -EINVAL; + + /* First make sure the chunk contains compressed data */ + if (chunk_hdr & 0x8000) { + /* Decompress a chunk and return if we get an error */ + ssize_t err = + decompress_chunk(unc_chunk, unc_end, + cmpr_chunk + sizeof(chunk_hdr), + cmpr_chunk + cmpr_use); + if (err < 0) + return err; + unc_use = err; + } else { + /* This chunk does not contain compressed data */ + unc_use = unc_chunk + LZNT_CHUNK_SIZE > unc_end + ? unc_end - unc_chunk + : LZNT_CHUNK_SIZE; + + if (cmpr_chunk + sizeof(chunk_hdr) + unc_use > + cmpr_end) { + return -EINVAL; + } + + memcpy(unc_chunk, cmpr_chunk + sizeof(chunk_hdr), + unc_use); + } + + /* Advance pointers */ + cmpr_chunk += cmpr_use; + unc_chunk += unc_use; + + /* Check for the end of unc buffer */ + if (unc_chunk >= unc_end) + break; + + /* Proceed the next chunk */ + if (cmpr_chunk > cmpr_end - 2) + break; + + chunk_size_saved = LZNT_CHUNK_SIZE; + + /* read chunk header */ + chunk_hdr = cmpr_chunk[1]; + chunk_hdr <<= 8; + chunk_hdr |= cmpr_chunk[0]; + + if (!chunk_hdr) + break; + + /* Check the size of unc buffer */ + if (unc_use < chunk_size_saved) { + size_t t1 = chunk_size_saved - unc_use; + u8 *t2 = unc_chunk + t1; + + /* 'Zero' memory */ + if (t2 >= unc_end) + break; + + memset(unc_chunk, 0, t1); + unc_chunk = t2; + } + } + + /* Check compression boundary */ + if (cmpr_chunk > cmpr_end) + return -EINVAL; + + /* + * The unc size is just a difference between current + * pointer and original one + */ + return PtrOffset(unc, unc_chunk); +} diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c new file mode 100644 index 000000000000..66805b2d9585 --- /dev/null +++ b/fs/ntfs3/namei.c @@ -0,0 +1,576 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + */ + +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" + +/* + * fill_name_de + * + * formats NTFS_DE in 'buf' + */ +int fill_name_de(struct ntfs_sb_info *sbi, void *buf, const struct qstr *name, + const struct cpu_str *uni) +{ + int err; + struct NTFS_DE *e = buf; + u16 data_size; + struct ATTR_FILE_NAME *fname = (struct ATTR_FILE_NAME *)(e + 1); + +#ifndef CONFIG_NTFS3_64BIT_CLUSTER + e->ref.high = fname->home.high = 0; +#endif + if (uni) { +#ifdef __BIG_ENDIAN + int ulen = uni->len; + __le16 *uname = fname->name; + const u16 *name_cpu = uni->name; + + while (ulen--) + *uname++ = cpu_to_le16(*name_cpu++); +#else + memcpy(fname->name, uni->name, uni->len * sizeof(u16)); +#endif + fname->name_len = uni->len; + + } else { + /* Convert input string to unicode */ + err = ntfs_nls_to_utf16(sbi, name->name, name->len, + (struct cpu_str *)&fname->name_len, + NTFS_NAME_LEN, UTF16_LITTLE_ENDIAN); + if (err < 0) + return err; + } + + fname->type = FILE_NAME_POSIX; + data_size = fname_full_size(fname); + + e->size = cpu_to_le16(QuadAlign(data_size) + sizeof(struct NTFS_DE)); + e->key_size = cpu_to_le16(data_size); + e->flags = 0; + e->res = 0; + + return 0; +} + +/* + * ntfs_lookup + * + * inode_operations::lookup + */ +static struct dentry *ntfs_lookup(struct inode *dir, struct dentry *dentry, + u32 flags) +{ + struct ntfs_inode *ni = ntfs_i(dir); + struct cpu_str *uni = __getname(); + struct inode *inode; + int err; + + if (!uni) + inode = ERR_PTR(-ENOMEM); + else { + err = ntfs_nls_to_utf16(ni->mi.sbi, dentry->d_name.name, + dentry->d_name.len, uni, NTFS_NAME_LEN, + UTF16_HOST_ENDIAN); + if (err < 0) + inode = ERR_PTR(err); + else { + ni_lock(ni); + inode = dir_search_u(dir, uni, NULL); + ni_unlock(ni); + } + __putname(uni); + } + + return d_splice_alias(inode, dentry); +} + +/* + * ntfs_create + * + * inode_operations::create + */ +static int ntfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, + bool excl) +{ + struct ntfs_inode *ni = ntfs_i(dir); + struct inode *inode; + + ni_lock_dir(ni); + + inode = ntfs_create_inode(dir, dentry, NULL, S_IFREG | mode, + 0, NULL, 0, excl, NULL); + + ni_unlock(ni); + + return IS_ERR(inode) ? PTR_ERR(inode) : 0; +} + +/* + * ntfs_link + * + * inode_operations::link + */ +static int ntfs_link(struct dentry *ode, struct inode *dir, struct dentry *de) +{ + int err; + struct inode *inode = d_inode(ode); + struct ntfs_inode *ni = ntfs_i(inode); + + if (S_ISDIR(inode->i_mode)) + return -EPERM; + + if (inode->i_nlink >= NTFS_LINK_MAX) + return -EMLINK; + + ni_lock_dir(ntfs_i(dir)); + if (inode != dir) + ni_lock(ni); + + dir->i_ctime = dir->i_mtime = inode->i_ctime = current_time(inode); + inc_nlink(inode); + ihold(inode); + + err = ntfs_link_inode(inode, de); + if (!err) { + mark_inode_dirty(inode); + mark_inode_dirty(dir); + d_instantiate(de, inode); + } else { + drop_nlink(inode); + iput(inode); + } + + if (inode != dir) + ni_unlock(ni); + ni_unlock(ntfs_i(dir)); + + return err; +} + +/* + * ntfs_unlink + * + * inode_operations::unlink + */ +static int ntfs_unlink(struct inode *dir, struct dentry *dentry) +{ + struct ntfs_inode *ni = ntfs_i(dir); + int err; + + ni_lock_dir(ni); + + err = ntfs_unlink_inode(dir, dentry); + + ni_unlock(ni); + + return err; +} + +/* + * ntfs_symlink + * + * inode_operations::symlink + */ +static int ntfs_symlink(struct inode *dir, struct dentry *dentry, + const char *symname) +{ + u32 size = strlen(symname); + struct inode *inode; + struct ntfs_inode *ni = ntfs_i(dir); + + ni_lock_dir(ni); + + inode = ntfs_create_inode(dir, dentry, NULL, S_IFLNK | 0777, + 0, symname, size, 0, NULL); + + ni_unlock(ni); + + return IS_ERR(inode) ? PTR_ERR(inode) : 0; +} + +/* + * ntfs_mkdir + * + * inode_operations::mkdir + */ +static int ntfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) +{ + struct inode *inode; + struct ntfs_inode *ni = ntfs_i(dir); + + ni_lock_dir(ni); + + inode = ntfs_create_inode(dir, dentry, NULL, S_IFDIR | mode, + 0, NULL, -1, 0, NULL); + + ni_unlock(ni); + + return IS_ERR(inode) ? PTR_ERR(inode) : 0; +} + +/* + * ntfs_rmdir + * + * inode_operations::rm_dir + */ +static int ntfs_rmdir(struct inode *dir, struct dentry *dentry) +{ + struct ntfs_inode *ni = ntfs_i(dir); + int err; + + ni_lock_dir(ni); + + err = ntfs_unlink_inode(dir, dentry); + + ni_unlock(ni); + + return err; +} + +/* + * ntfs_rename + * + * inode_operations::rename + */ +static int ntfs_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + u32 flags) +{ + int err; + struct super_block *sb = old_dir->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct ntfs_inode *old_dir_ni = ntfs_i(old_dir); + struct ntfs_inode *new_dir_ni = ntfs_i(new_dir); + struct ntfs_inode *old_ni; + struct ATTR_FILE_NAME *old_name, *new_name, *fname; + u8 name_type; + bool is_same; + struct inode *old_inode, *new_inode; + struct NTFS_DE *old_de, *new_de; + struct ATTRIB *attr; + struct ATTR_LIST_ENTRY *le; + u16 new_de_key_size; + + static_assert(SIZEOF_ATTRIBUTE_FILENAME_MAX + SIZEOF_RESIDENT < 1024); + static_assert(SIZEOF_ATTRIBUTE_FILENAME_MAX + sizeof(struct NTFS_DE) < + 1024); + static_assert(PATH_MAX >= 4 * 1024); + + if (flags & ~RENAME_NOREPLACE) + return -EINVAL; + + old_inode = d_inode(old_dentry); + new_inode = d_inode(new_dentry); + + old_ni = ntfs_i(old_inode); + + is_same = old_dentry->d_name.len == new_dentry->d_name.len && + !memcmp(old_dentry->d_name.name, new_dentry->d_name.name, + old_dentry->d_name.len); + + if (is_same && old_dir == new_dir) { + /* Nothing to do */ + err = 0; + goto out; + } + + if (ntfs_is_meta_file(sbi, old_inode->i_ino)) { + err = -EINVAL; + goto out; + } + + if (new_inode) { + /*target name exists. unlink it*/ + dget(new_dentry); + ni_lock_dir(new_dir_ni); + err = ntfs_unlink_inode(new_dir, new_dentry); + ni_unlock(new_dir_ni); + dput(new_dentry); + if (err) + goto out; + } + + /* allocate PATH_MAX bytes */ + old_de = __getname(); + if (!old_de) { + err = -ENOMEM; + goto out; + } + + err = fill_name_de(sbi, old_de, &old_dentry->d_name, NULL); + if (err < 0) + goto out1; + + old_name = (struct ATTR_FILE_NAME *)(old_de + 1); + + if (is_same) { + new_de = old_de; + } else { + new_de = Add2Ptr(old_de, 1024); + err = fill_name_de(sbi, new_de, &new_dentry->d_name, NULL); + if (err < 0) + goto out1; + } + + ni_lock_dir(old_dir_ni); + ni_lock(old_ni); + + mi_get_ref(&old_dir_ni->mi, &old_name->home); + + /*get pointer to file_name in mft*/ + fname = ni_fname_name(old_ni, (struct cpu_str *)&old_name->name_len, + &old_name->home, &le); + if (!fname) { + err = -EINVAL; + goto out2; + } + + /* Copy fname info from record into new fname */ + new_name = (struct ATTR_FILE_NAME *)(new_de + 1); + memcpy(&new_name->dup, &fname->dup, sizeof(fname->dup)); + + name_type = paired_name(fname->type); + + /* remove first name from directory */ + err = indx_delete_entry(&old_dir_ni->dir, old_dir_ni, old_de + 1, + le16_to_cpu(old_de->key_size), sbi); + if (err) + goto out3; + + /* remove first name from mft */ + err = ni_remove_attr_le(old_ni, attr_from_name(fname), le); + if (err) + goto out4; + + le16_add_cpu(&old_ni->mi.mrec->hard_links, -1); + old_ni->mi.dirty = true; + + if (name_type != FILE_NAME_POSIX) { + /* get paired name */ + fname = ni_fname_type(old_ni, name_type, &le); + if (fname) { + /* remove second name from directory */ + err = indx_delete_entry(&old_dir_ni->dir, old_dir_ni, + fname, fname_full_size(fname), + sbi); + if (err) + goto out5; + + /* remove second name from mft */ + err = ni_remove_attr_le(old_ni, attr_from_name(fname), + le); + if (err) + goto out6; + + le16_add_cpu(&old_ni->mi.mrec->hard_links, -1); + old_ni->mi.dirty = true; + } + } + + /* Add new name */ + mi_get_ref(&old_ni->mi, &new_de->ref); + mi_get_ref(&ntfs_i(new_dir)->mi, &new_name->home); + + new_de_key_size = le16_to_cpu(new_de->key_size); + + /* insert new name in mft */ + err = ni_insert_resident(old_ni, new_de_key_size, ATTR_NAME, NULL, 0, + &attr, NULL); + if (err) + goto out7; + + attr->res.flags = RESIDENT_FLAG_INDEXED; + + memcpy(Add2Ptr(attr, SIZEOF_RESIDENT), new_name, new_de_key_size); + + le16_add_cpu(&old_ni->mi.mrec->hard_links, 1); + old_ni->mi.dirty = true; + + /* insert new name in directory */ + err = indx_insert_entry(&new_dir_ni->dir, new_dir_ni, new_de, sbi, + NULL); + if (err) + goto out8; + + if (IS_DIRSYNC(new_dir)) + err = ntfs_sync_inode(old_inode); + else + mark_inode_dirty(old_inode); + + old_dir->i_ctime = old_dir->i_mtime = current_time(old_dir); + if (IS_DIRSYNC(old_dir)) + (void)ntfs_sync_inode(old_dir); + else + mark_inode_dirty(old_dir); + + if (old_dir != new_dir) { + new_dir->i_mtime = new_dir->i_ctime = old_dir->i_ctime; + mark_inode_dirty(new_dir); + } + + if (old_inode) { + old_inode->i_ctime = old_dir->i_ctime; + mark_inode_dirty(old_inode); + } + + err = 0; + /* normal way */ + goto out2; + +out8: + /* undo + * ni_insert_resident(old_ni, new_de_key_size, ATTR_NAME, NULL, 0, + * &attr, NULL); + */ + mi_remove_attr(&old_ni->mi, attr); +out7: + /* undo + * ni_remove_attr_le(old_ni, attr_from_name(fname), le); + */ +out6: + /* undo + * indx_delete_entry(&old_dir_ni->dir, old_dir_ni, + * fname, fname_full_size(fname), + * sbi); + */ +out5: + /* undo + * ni_remove_attr_le(old_ni, attr_from_name(fname), le); + */ +out4: + /* undo: + * indx_delete_entry(&old_dir_ni->dir, old_dir_ni, old_de + 1, + * old_de->key_size, NULL); + */ +out3: +out2: + ni_unlock(old_ni); + ni_unlock(old_dir_ni); +out1: + __putname(old_de); +out: + return err; +} + +/* + * ntfs_atomic_open + * + * inode_operations::atomic_open + */ +static int ntfs_atomic_open(struct inode *dir, struct dentry *dentry, + struct file *file, u32 flags, umode_t mode) +{ + int err; + bool excl = !!(flags & O_EXCL); + struct inode *inode; + struct ntfs_fnd *fnd = NULL; + struct ntfs_inode *ni = ntfs_i(dir); + struct dentry *d = NULL; + struct cpu_str *uni = __getname(); + + if (!uni) + return -ENOMEM; + + err = ntfs_nls_to_utf16(ni->mi.sbi, dentry->d_name.name, + dentry->d_name.len, uni, NTFS_NAME_LEN, + UTF16_HOST_ENDIAN); + if (err < 0) + goto out; + + ni_lock_dir(ni); + + if (d_in_lookup(dentry)) { + fnd = fnd_get(); + if (!fnd) { + err = -ENOMEM; + goto out1; + } + + d = d_splice_alias(dir_search_u(dir, uni, fnd), dentry); + if (IS_ERR(d)) { + err = PTR_ERR(d); + d = NULL; + goto out2; + } + + if (d) + dentry = d; + } + + if (!(flags & O_CREAT) || d_really_is_positive(dentry)) { + err = finish_no_open(file, d); + goto out2; + } + + file->f_mode |= FMODE_CREATED; + + /*fnd contains tree's path to insert to*/ + inode = ntfs_create_inode(dir, dentry, uni, mode, 0, + NULL, 0, excl, fnd); + err = IS_ERR(inode) ? PTR_ERR(inode) + : finish_open(file, dentry, ntfs_file_open); + dput(d); + +out2: + fnd_put(fnd); +out1: + ni_unlock(ni); +out: + __putname(uni); + + return err; +} + +struct dentry *ntfs3_get_parent(struct dentry *child) +{ + struct inode *inode = d_inode(child); + struct ntfs_inode *ni = ntfs_i(inode); + + struct ATTR_LIST_ENTRY *le = NULL; + struct ATTRIB *attr = NULL; + struct ATTR_FILE_NAME *fname; + + while ((attr = ni_find_attr(ni, attr, &le, ATTR_NAME, NULL, 0, NULL, + NULL))) { + fname = resident_data_ex(attr, SIZEOF_ATTRIBUTE_FILENAME); + if (!fname) + continue; + + return d_obtain_alias( + ntfs_iget5(inode->i_sb, &fname->home, NULL)); + } + + return ERR_PTR(-ENOENT); +} + +const struct inode_operations ntfs_dir_inode_operations = { + .lookup = ntfs_lookup, + .create = ntfs_create, + .link = ntfs_link, + .unlink = ntfs_unlink, + .symlink = ntfs_symlink, + .mkdir = ntfs_mkdir, + .rmdir = ntfs_rmdir, + .rename = ntfs_rename, + .permission = ntfs_permission, + .get_acl = ntfs_get_acl, + .set_acl = ntfs_set_acl, + .setattr = ntfs3_setattr, + .getattr = ntfs_getattr, + .listxattr = ntfs_listxattr, + .atomic_open = ntfs_atomic_open, + .fiemap = ntfs_fiemap, +}; diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h new file mode 100644 index 000000000000..d3d3d93a1f06 --- /dev/null +++ b/fs/ntfs3/ntfs.h @@ -0,0 +1,1237 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + * on-disk ntfs structs + */ + +// clang-format off + +/* TODO: + * - Check 4K mft record and 512 bytes cluster + */ + +/* + * Activate this define to use binary search in indexes + */ +#define NTFS3_INDEX_BINARY_SEARCH + +/* + * Check each run for marked clusters + */ +#define NTFS3_CHECK_FREE_CLST + +#define NTFS_NAME_LEN 255 + +/* + * ntfs.sys used 500 maximum links + * on-disk struct allows up to 0xffff + */ +#define NTFS_LINK_MAX 0x400 +//#define NTFS_LINK_MAX 0xffff + +/* + * Activate to use 64 bit clusters instead of 32 bits in ntfs.sys + * Logical and virtual cluster number + * If needed, may be redefined to use 64 bit value + */ +//#define CONFIG_NTFS3_64BIT_CLUSTER + +#define NTFS_LZNT_MAX_CLUSTER 4096 +#define NTFS_LZNT_CUNIT 4 +#define NTFS_LZNT_CLUSTERS (1u<low) | ((u64)le16_to_cpu(ref->high) << 32); +#else + return le32_to_cpu(ref->low); +#endif +} + +struct NTFS_BOOT { + u8 jump_code[3]; // 0x00: Jump to boot code + u8 system_id[8]; // 0x03: System ID, equals "NTFS " + + // NOTE: this member is not aligned(!) + // bytes_per_sector[0] must be 0 + // bytes_per_sector[1] must be multiplied by 256 + u8 bytes_per_sector[2]; // 0x0B: Bytes per sector + + u8 sectors_per_clusters;// 0x0D: Sectors per cluster + u8 unused1[7]; + u8 media_type; // 0x15: Media type (0xF8 - harddisk) + u8 unused2[2]; + __le16 sct_per_track; // 0x18: number of sectors per track + __le16 heads; // 0x1A: number of heads per cylinder + __le32 hidden_sectors; // 0x1C: number of 'hidden' sectors + u8 unused3[4]; + u8 bios_drive_num; // 0x24: BIOS drive number =0x80 + u8 unused4; + u8 signature_ex; // 0x26: Extended BOOT signature =0x80 + u8 unused5; + __le64 sectors_per_volume;// 0x28: size of volume in sectors + __le64 mft_clst; // 0x30: first cluster of $MFT + __le64 mft2_clst; // 0x38: first cluster of $MFTMirr + s8 record_size; // 0x40: size of MFT record in clusters(sectors) + u8 unused6[3]; + s8 index_size; // 0x44: size of INDX record in clusters(sectors) + u8 unused7[3]; + __le64 serial_num; // 0x48: Volume serial number + __le32 check_sum; // 0x50: Simple additive checksum of all + // of the u32's which precede the 'check_sum' + + u8 boot_code[0x200 - 0x50 - 2 - 4]; // 0x54: + u8 boot_magic[2]; // 0x1FE: Boot signature =0x55 + 0xAA +}; + +static_assert(sizeof(struct NTFS_BOOT) == 0x200); + +enum NTFS_SIGNATURE { + NTFS_FILE_SIGNATURE = cpu_to_le32(0x454C4946), // 'FILE' + NTFS_INDX_SIGNATURE = cpu_to_le32(0x58444E49), // 'INDX' + NTFS_CHKD_SIGNATURE = cpu_to_le32(0x444B4843), // 'CHKD' + NTFS_RSTR_SIGNATURE = cpu_to_le32(0x52545352), // 'RSTR' + NTFS_RCRD_SIGNATURE = cpu_to_le32(0x44524352), // 'RCRD' + NTFS_BAAD_SIGNATURE = cpu_to_le32(0x44414142), // 'BAAD' + NTFS_HOLE_SIGNATURE = cpu_to_le32(0x454C4F48), // 'HOLE' + NTFS_FFFF_SIGNATURE = cpu_to_le32(0xffffffff), +}; + +static_assert(sizeof(enum NTFS_SIGNATURE) == 4); + +/* MFT Record header structure */ +struct NTFS_RECORD_HEADER { + /* Record magic number, equals 'FILE'/'INDX'/'RSTR'/'RCRD' */ + enum NTFS_SIGNATURE sign; // 0x00: + __le16 fix_off; // 0x04: + __le16 fix_num; // 0x06: + __le64 lsn; // 0x08: Log file sequence number +}; + +static_assert(sizeof(struct NTFS_RECORD_HEADER) == 0x10); + +static inline int is_baad(const struct NTFS_RECORD_HEADER *hdr) +{ + return hdr->sign == NTFS_BAAD_SIGNATURE; +} + +/* Possible bits in struct MFT_REC.flags */ +enum RECORD_FLAG { + RECORD_FLAG_IN_USE = cpu_to_le16(0x0001), + RECORD_FLAG_DIR = cpu_to_le16(0x0002), + RECORD_FLAG_SYSTEM = cpu_to_le16(0x0004), + RECORD_FLAG_UNKNOWN = cpu_to_le16(0x0008), +}; + +/* MFT Record structure */ +struct MFT_REC { + struct NTFS_RECORD_HEADER rhdr; // 'FILE' + + __le16 seq; // 0x10: Sequence number for this record + __le16 hard_links; // 0x12: The number of hard links to record + __le16 attr_off; // 0x14: Offset to attributes + __le16 flags; // 0x16: See RECORD_FLAG + __le32 used; // 0x18: The size of used part + __le32 total; // 0x1C: Total record size + + struct MFT_REF parent_ref; // 0x20: Parent MFT record + __le16 next_attr_id; // 0x28: The next attribute Id + + __le16 res; // 0x2A: High part of mft record? + __le32 mft_record; // 0x2C: Current mft record number + __le16 fixups[1]; // 0x30: +}; + +#define MFTRECORD_FIXUP_OFFSET_1 offsetof(struct MFT_REC, res) +#define MFTRECORD_FIXUP_OFFSET_3 offsetof(struct MFT_REC, fixups) + +static_assert(MFTRECORD_FIXUP_OFFSET_1 == 0x2A); +static_assert(MFTRECORD_FIXUP_OFFSET_3 == 0x30); + +static inline bool is_rec_base(const struct MFT_REC *rec) +{ + const struct MFT_REF *r = &rec->parent_ref; + + return !r->low && !r->high && !r->seq; +} + +static inline bool is_mft_rec5(const struct MFT_REC *rec) +{ + return le16_to_cpu(rec->rhdr.fix_off) >= + offsetof(struct MFT_REC, fixups); +} + +static inline bool is_rec_inuse(const struct MFT_REC *rec) +{ + return rec->flags & RECORD_FLAG_IN_USE; +} + +static inline bool clear_rec_inuse(struct MFT_REC *rec) +{ + return rec->flags &= ~RECORD_FLAG_IN_USE; +} + +/* Possible values of ATTR_RESIDENT.flags */ +#define RESIDENT_FLAG_INDEXED 0x01 + +struct ATTR_RESIDENT { + __le32 data_size; // 0x10: The size of data + __le16 data_off; // 0x14: Offset to data + u8 flags; // 0x16: resident flags ( 1 - indexed ) + u8 res; // 0x17: +}; // sizeof() = 0x18 + +struct ATTR_NONRESIDENT { + __le64 svcn; // 0x10: Starting VCN of this segment + __le64 evcn; // 0x18: End VCN of this segment + __le16 run_off; // 0x20: Offset to packed runs + // Unit of Compression size for this stream, expressed + // as a log of the cluster size. + // + // 0 means file is not compressed + // 1, 2, 3, and 4 are potentially legal values if the + // stream is compressed, however the implementation + // may only choose to use 4, or possibly 3. Note + // that 4 means cluster size time 16. If convenient + // the implementation may wish to accept a + // reasonable range of legal values here (1-5?), + // even if the implementation only generates + // a smaller set of values itself. + u8 c_unit; // 0x22 + u8 res1[5]; // 0x23: + __le64 alloc_size; // 0x28: The allocated size of attribute in bytes + // (multiple of cluster size) + __le64 data_size; // 0x30: The size of attribute in bytes <= alloc_size + __le64 valid_size; // 0x38: The size of valid part in bytes <= data_size + __le64 total_size; // 0x40: The sum of the allocated clusters for a file + // (present only for the first segment (0 == vcn) + // of compressed attribute) + +}; // sizeof()=0x40 or 0x48 (if compressed) + +/* Possible values of ATTRIB.flags: */ +#define ATTR_FLAG_COMPRESSED cpu_to_le16(0x0001) +#define ATTR_FLAG_COMPRESSED_MASK cpu_to_le16(0x00FF) +#define ATTR_FLAG_ENCRYPTED cpu_to_le16(0x4000) +#define ATTR_FLAG_SPARSED cpu_to_le16(0x8000) + +struct ATTRIB { + enum ATTR_TYPE type; // 0x00: The type of this attribute + __le32 size; // 0x04: The size of this attribute + u8 non_res; // 0x08: Is this attribute non-resident ? + u8 name_len; // 0x09: This attribute name length + __le16 name_off; // 0x0A: Offset to the attribute name + __le16 flags; // 0x0C: See ATTR_FLAG_XXX + __le16 id; // 0x0E: unique id (per record) + + union { + struct ATTR_RESIDENT res; // 0x10 + struct ATTR_NONRESIDENT nres; // 0x10 + }; +}; + +/* Define attribute sizes */ +#define SIZEOF_RESIDENT 0x18 +#define SIZEOF_NONRESIDENT_EX 0x48 +#define SIZEOF_NONRESIDENT 0x40 + +#define SIZEOF_RESIDENT_LE cpu_to_le16(0x18) +#define SIZEOF_NONRESIDENT_EX_LE cpu_to_le16(0x48) +#define SIZEOF_NONRESIDENT_LE cpu_to_le16(0x40) + +static inline u64 attr_ondisk_size(const struct ATTRIB *attr) +{ + return attr->non_res ? ((attr->flags & + (ATTR_FLAG_COMPRESSED | ATTR_FLAG_SPARSED)) ? + le64_to_cpu(attr->nres.total_size) : + le64_to_cpu(attr->nres.alloc_size)) : + QuadAlign(le32_to_cpu(attr->res.data_size)); +} + +static inline u64 attr_size(const struct ATTRIB *attr) +{ + return attr->non_res ? le64_to_cpu(attr->nres.data_size) : + le32_to_cpu(attr->res.data_size); +} + +static inline bool is_attr_encrypted(const struct ATTRIB *attr) +{ + return attr->flags & ATTR_FLAG_ENCRYPTED; +} + +static inline bool is_attr_sparsed(const struct ATTRIB *attr) +{ + return attr->flags & ATTR_FLAG_SPARSED; +} + +static inline bool is_attr_compressed(const struct ATTRIB *attr) +{ + return attr->flags & ATTR_FLAG_COMPRESSED; +} + +static inline bool is_attr_ext(const struct ATTRIB *attr) +{ + return attr->flags & (ATTR_FLAG_SPARSED | ATTR_FLAG_COMPRESSED); +} + +static inline bool is_attr_indexed(const struct ATTRIB *attr) +{ + return !attr->non_res && (attr->res.flags & RESIDENT_FLAG_INDEXED); +} + +static inline __le16 const *attr_name(const struct ATTRIB *attr) +{ + return Add2Ptr(attr, le16_to_cpu(attr->name_off)); +} + +static inline u64 attr_svcn(const struct ATTRIB *attr) +{ + return attr->non_res ? le64_to_cpu(attr->nres.svcn) : 0; +} + +/* the size of resident attribute by its resident size */ +#define BYTES_PER_RESIDENT(b) (0x18 + (b)) + +static_assert(sizeof(struct ATTRIB) == 0x48); +static_assert(sizeof(((struct ATTRIB *)NULL)->res) == 0x08); +static_assert(sizeof(((struct ATTRIB *)NULL)->nres) == 0x38); + +static inline void *resident_data_ex(const struct ATTRIB *attr, u32 datasize) +{ + u32 asize, rsize; + u16 off; + + if (attr->non_res) + return NULL; + + asize = le32_to_cpu(attr->size); + off = le16_to_cpu(attr->res.data_off); + + if (asize < datasize + off) + return NULL; + + rsize = le32_to_cpu(attr->res.data_size); + if (rsize < datasize) + return NULL; + + return Add2Ptr(attr, off); +} + +static inline void *resident_data(const struct ATTRIB *attr) +{ + return Add2Ptr(attr, le16_to_cpu(attr->res.data_off)); +} + +static inline void *attr_run(const struct ATTRIB *attr) +{ + return Add2Ptr(attr, le16_to_cpu(attr->nres.run_off)); +} + +/* Standard information attribute (0x10) */ +struct ATTR_STD_INFO { + __le64 cr_time; // 0x00: File creation file + __le64 m_time; // 0x08: File modification time + __le64 c_time; // 0x10: Last time any attribute was modified + __le64 a_time; // 0x18: File last access time + enum FILE_ATTRIBUTE fa; // 0x20: Standard DOS attributes & more + __le32 max_ver_num; // 0x24: Maximum Number of Versions + __le32 ver_num; // 0x28: Version Number + __le32 class_id; // 0x2C: Class Id from bidirectional Class Id index +}; + +static_assert(sizeof(struct ATTR_STD_INFO) == 0x30); + +#define SECURITY_ID_INVALID 0x00000000 +#define SECURITY_ID_FIRST 0x00000100 + +struct ATTR_STD_INFO5 { + __le64 cr_time; // 0x00: File creation file + __le64 m_time; // 0x08: File modification time + __le64 c_time; // 0x10: Last time any attribute was modified + __le64 a_time; // 0x18: File last access time + enum FILE_ATTRIBUTE fa; // 0x20: Standard DOS attributes & more + __le32 max_ver_num; // 0x24: Maximum Number of Versions + __le32 ver_num; // 0x28: Version Number + __le32 class_id; // 0x2C: Class Id from bidirectional Class Id index + + __le32 owner_id; // 0x30: Owner Id of the user owning the file. + __le32 security_id; // 0x34: The Security Id is a key in the $SII Index and $SDS + __le64 quota_charge; // 0x38: + __le64 usn; // 0x40: Last Update Sequence Number of the file. This is a direct + // index into the file $UsnJrnl. If zero, the USN Journal is + // disabled. +}; + +static_assert(sizeof(struct ATTR_STD_INFO5) == 0x48); + +/* attribute list entry structure (0x20) */ +struct ATTR_LIST_ENTRY { + enum ATTR_TYPE type; // 0x00: The type of attribute + __le16 size; // 0x04: The size of this record + u8 name_len; // 0x06: The length of attribute name + u8 name_off; // 0x07: The offset to attribute name + __le64 vcn; // 0x08: Starting VCN of this attribute + struct MFT_REF ref; // 0x10: MFT record number with attribute + __le16 id; // 0x18: struct ATTRIB ID + __le16 name[3]; // 0x1A: Just to align. To get real name can use bNameOffset + +}; // sizeof(0x20) + +static_assert(sizeof(struct ATTR_LIST_ENTRY) == 0x20); + +static inline u32 le_size(u8 name_len) +{ + return QuadAlign(offsetof(struct ATTR_LIST_ENTRY, name) + + name_len * sizeof(short)); +} + +/* returns 0 if 'attr' has the same type and name */ +static inline int le_cmp(const struct ATTR_LIST_ENTRY *le, + const struct ATTRIB *attr) +{ + return le->type != attr->type || le->name_len != attr->name_len || + (!le->name_len && + memcmp(Add2Ptr(le, le->name_off), + Add2Ptr(attr, le16_to_cpu(attr->name_off)), + le->name_len * sizeof(short))); +} + +static inline __le16 const *le_name(const struct ATTR_LIST_ENTRY *le) +{ + return Add2Ptr(le, le->name_off); +} + +/* File name types (the field type in struct ATTR_FILE_NAME ) */ +#define FILE_NAME_POSIX 0 +#define FILE_NAME_UNICODE 1 +#define FILE_NAME_DOS 2 +#define FILE_NAME_UNICODE_AND_DOS (FILE_NAME_DOS | FILE_NAME_UNICODE) + +/* Filename attribute structure (0x30) */ +struct NTFS_DUP_INFO { + __le64 cr_time; // 0x00: File creation file + __le64 m_time; // 0x08: File modification time + __le64 c_time; // 0x10: Last time any attribute was modified + __le64 a_time; // 0x18: File last access time + __le64 alloc_size; // 0x20: Data attribute allocated size, multiple of cluster size + __le64 data_size; // 0x28: Data attribute size <= Dataalloc_size + enum FILE_ATTRIBUTE fa; // 0x30: Standard DOS attributes & more + __le16 ea_size; // 0x34: Packed EAs + __le16 reparse; // 0x36: Used by Reparse + +}; // 0x38 + +struct ATTR_FILE_NAME { + struct MFT_REF home; // 0x00: MFT record for directory + struct NTFS_DUP_INFO dup;// 0x08 + u8 name_len; // 0x40: File name length in words + u8 type; // 0x41: File name type + __le16 name[1]; // 0x42: File name +}; + +static_assert(sizeof(((struct ATTR_FILE_NAME *)NULL)->dup) == 0x38); +static_assert(offsetof(struct ATTR_FILE_NAME, name) == 0x42); +#define SIZEOF_ATTRIBUTE_FILENAME 0x44 +#define SIZEOF_ATTRIBUTE_FILENAME_MAX (0x42 + 255 * 2) + +static inline struct ATTRIB *attr_from_name(struct ATTR_FILE_NAME *fname) +{ + return (struct ATTRIB *)((char *)fname - SIZEOF_RESIDENT); +} + +static inline u16 fname_full_size(const struct ATTR_FILE_NAME *fname) +{ + return offsetof(struct ATTR_FILE_NAME, name) + + fname->name_len * sizeof(short); +} + +static inline u8 paired_name(u8 type) +{ + if (type == FILE_NAME_UNICODE) + return FILE_NAME_DOS; + if (type == FILE_NAME_DOS) + return FILE_NAME_UNICODE; + return FILE_NAME_POSIX; +} + +/* Index entry defines ( the field flags in NtfsDirEntry ) */ +#define NTFS_IE_HAS_SUBNODES cpu_to_le16(1) +#define NTFS_IE_LAST cpu_to_le16(2) + +/* Directory entry structure */ +struct NTFS_DE { + union { + struct MFT_REF ref; // 0x00: MFT record number with this file + struct { + __le16 data_off; // 0x00: + __le16 data_size; // 0x02: + __le32 res; // 0x04: must be 0 + } view; + }; + __le16 size; // 0x08: The size of this entry + __le16 key_size; // 0x0A: The size of File name length in bytes + 0x42 + __le16 flags; // 0x0C: Entry flags: NTFS_IE_XXX + __le16 res; // 0x0E: + + // Here any indexed attribute can be placed + // One of them is: + // struct ATTR_FILE_NAME AttrFileName; + // + + // The last 8 bytes of this structure contains + // the VBN of subnode + // !!! Note !!! + // This field is presented only if (flags & NTFS_IE_HAS_SUBNODES) + // __le64 vbn; +}; + +static_assert(sizeof(struct NTFS_DE) == 0x10); + +static inline void de_set_vbn_le(struct NTFS_DE *e, __le64 vcn) +{ + __le64 *v = Add2Ptr(e, le16_to_cpu(e->size) - sizeof(__le64)); + + *v = vcn; +} + +static inline void de_set_vbn(struct NTFS_DE *e, CLST vcn) +{ + __le64 *v = Add2Ptr(e, le16_to_cpu(e->size) - sizeof(__le64)); + + *v = cpu_to_le64(vcn); +} + +static inline __le64 de_get_vbn_le(const struct NTFS_DE *e) +{ + return *(__le64 *)Add2Ptr(e, le16_to_cpu(e->size) - sizeof(__le64)); +} + +static inline CLST de_get_vbn(const struct NTFS_DE *e) +{ + __le64 *v = Add2Ptr(e, le16_to_cpu(e->size) - sizeof(__le64)); + + return le64_to_cpu(*v); +} + +static inline struct NTFS_DE *de_get_next(const struct NTFS_DE *e) +{ + return Add2Ptr(e, le16_to_cpu(e->size)); +} + +static inline struct ATTR_FILE_NAME *de_get_fname(const struct NTFS_DE *e) +{ + return le16_to_cpu(e->key_size) >= SIZEOF_ATTRIBUTE_FILENAME ? + Add2Ptr(e, sizeof(struct NTFS_DE)) : + NULL; +} + +static inline bool de_is_last(const struct NTFS_DE *e) +{ + return e->flags & NTFS_IE_LAST; +} + +static inline bool de_has_vcn(const struct NTFS_DE *e) +{ + return e->flags & NTFS_IE_HAS_SUBNODES; +} + +static inline bool de_has_vcn_ex(const struct NTFS_DE *e) +{ + return (e->flags & NTFS_IE_HAS_SUBNODES) && + (u64)(-1) != *((u64 *)Add2Ptr(e, le16_to_cpu(e->size) - + sizeof(__le64))); +} + +#define MAX_BYTES_PER_NAME_ENTRY \ + QuadAlign(sizeof(struct NTFS_DE) + \ + offsetof(struct ATTR_FILE_NAME, name) + \ + NTFS_NAME_LEN * sizeof(short)) + +struct INDEX_HDR { + __le32 de_off; // 0x00: The offset from the start of this structure + // to the first NTFS_DE + __le32 used; // 0x04: The size of this structure plus all + // entries (quad-word aligned) + __le32 total; // 0x08: The allocated size of for this structure plus all entries + u8 flags; // 0x0C: 0x00 = Small directory, 0x01 = Large directory + u8 res[3]; + + // + // de_off + used <= total + // +}; + +static_assert(sizeof(struct INDEX_HDR) == 0x10); + +static inline struct NTFS_DE *hdr_first_de(const struct INDEX_HDR *hdr) +{ + u32 de_off = le32_to_cpu(hdr->de_off); + u32 used = le32_to_cpu(hdr->used); + struct NTFS_DE *e = Add2Ptr(hdr, de_off); + u16 esize; + + if (de_off >= used || de_off >= le32_to_cpu(hdr->total)) + return NULL; + + esize = le16_to_cpu(e->size); + if (esize < sizeof(struct NTFS_DE) || de_off + esize > used) + return NULL; + + return e; +} + +static inline struct NTFS_DE *hdr_next_de(const struct INDEX_HDR *hdr, + const struct NTFS_DE *e) +{ + size_t off = PtrOffset(hdr, e); + u32 used = le32_to_cpu(hdr->used); + u16 esize; + + if (off >= used) + return NULL; + + esize = le16_to_cpu(e->size); + + if (esize < sizeof(struct NTFS_DE) || + off + esize + sizeof(struct NTFS_DE) > used) + return NULL; + + return Add2Ptr(e, esize); +} + +static inline bool hdr_has_subnode(const struct INDEX_HDR *hdr) +{ + return hdr->flags & 1; +} + +struct INDEX_BUFFER { + struct NTFS_RECORD_HEADER rhdr; // 'INDX' + __le64 vbn; // 0x10: vcn if index >= cluster or vsn id index < cluster + struct INDEX_HDR ihdr; // 0x18: +}; + +static_assert(sizeof(struct INDEX_BUFFER) == 0x28); + +static inline bool ib_is_empty(const struct INDEX_BUFFER *ib) +{ + const struct NTFS_DE *first = hdr_first_de(&ib->ihdr); + + return !first || de_is_last(first); +} + +static inline bool ib_is_leaf(const struct INDEX_BUFFER *ib) +{ + return !(ib->ihdr.flags & 1); +} + +/* Index root structure ( 0x90 ) */ +enum COLLATION_RULE { + NTFS_COLLATION_TYPE_BINARY = cpu_to_le32(0), + // $I30 + NTFS_COLLATION_TYPE_FILENAME = cpu_to_le32(0x01), + // $SII of $Secure and $Q of Quota + NTFS_COLLATION_TYPE_UINT = cpu_to_le32(0x10), + // $O of Quota + NTFS_COLLATION_TYPE_SID = cpu_to_le32(0x11), + // $SDH of $Secure + NTFS_COLLATION_TYPE_SECURITY_HASH = cpu_to_le32(0x12), + // $O of ObjId and "$R" for Reparse + NTFS_COLLATION_TYPE_UINTS = cpu_to_le32(0x13) +}; + +static_assert(sizeof(enum COLLATION_RULE) == 4); + +// +struct INDEX_ROOT { + enum ATTR_TYPE type; // 0x00: The type of attribute to index on + enum COLLATION_RULE rule; // 0x04: The rule + __le32 index_block_size;// 0x08: The size of index record + u8 index_block_clst; // 0x0C: The number of clusters or sectors per index + u8 res[3]; + struct INDEX_HDR ihdr; // 0x10: +}; + +static_assert(sizeof(struct INDEX_ROOT) == 0x20); +static_assert(offsetof(struct INDEX_ROOT, ihdr) == 0x10); + +#define VOLUME_FLAG_DIRTY cpu_to_le16(0x0001) +#define VOLUME_FLAG_RESIZE_LOG_FILE cpu_to_le16(0x0002) + +struct VOLUME_INFO { + __le64 res1; // 0x00 + u8 major_ver; // 0x08: NTFS major version number (before .) + u8 minor_ver; // 0x09: NTFS minor version number (after .) + __le16 flags; // 0x0A: Volume flags, see VOLUME_FLAG_XXX + +}; // sizeof=0xC + +#define SIZEOF_ATTRIBUTE_VOLUME_INFO 0xc + +#define NTFS_LABEL_MAX_LENGTH (0x100 / sizeof(short)) +#define NTFS_ATTR_INDEXABLE cpu_to_le32(0x00000002) +#define NTFS_ATTR_DUPALLOWED cpu_to_le32(0x00000004) +#define NTFS_ATTR_MUST_BE_INDEXED cpu_to_le32(0x00000010) +#define NTFS_ATTR_MUST_BE_NAMED cpu_to_le32(0x00000020) +#define NTFS_ATTR_MUST_BE_RESIDENT cpu_to_le32(0x00000040) +#define NTFS_ATTR_LOG_ALWAYS cpu_to_le32(0x00000080) + +/* $AttrDef file entry */ +struct ATTR_DEF_ENTRY { + __le16 name[0x40]; // 0x00: Attr name + enum ATTR_TYPE type; // 0x80: struct ATTRIB type + __le32 res; // 0x84: + enum COLLATION_RULE rule; // 0x88: + __le32 flags; // 0x8C: NTFS_ATTR_XXX (see above) + __le64 min_sz; // 0x90: Minimum attribute data size + __le64 max_sz; // 0x98: Maximum attribute data size +}; + +static_assert(sizeof(struct ATTR_DEF_ENTRY) == 0xa0); + +/* Object ID (0x40) */ +struct OBJECT_ID { + struct GUID ObjId; // 0x00: Unique Id assigned to file + struct GUID BirthVolumeId;// 0x10: Birth Volume Id is the Object Id of the Volume on + // which the Object Id was allocated. It never changes + struct GUID BirthObjectId; // 0x20: Birth Object Id is the first Object Id that was + // ever assigned to this MFT Record. I.e. If the Object Id + // is changed for some reason, this field will reflect the + // original value of the Object Id. + struct GUID DomainId; // 0x30: Domain Id is currently unused but it is intended to be + // used in a network environment where the local machine is + // part of a Windows 2000 Domain. This may be used in a Windows + // 2000 Advanced Server managed domain. +}; + +static_assert(sizeof(struct OBJECT_ID) == 0x40); + +/* O Directory entry structure ( rule = 0x13 ) */ +struct NTFS_DE_O { + struct NTFS_DE de; + struct GUID ObjId; // 0x10: Unique Id assigned to file + struct MFT_REF ref; // 0x20: MFT record number with this file + struct GUID BirthVolumeId; // 0x28: Birth Volume Id is the Object Id of the Volume on + // which the Object Id was allocated. It never changes + struct GUID BirthObjectId; // 0x38: Birth Object Id is the first Object Id that was + // ever assigned to this MFT Record. I.e. If the Object Id + // is changed for some reason, this field will reflect the + // original value of the Object Id. + // This field is valid if data_size == 0x48 + struct GUID BirthDomainId; // 0x48: Domain Id is currently unused but it is intended + // to be used in a network environment where the local + // machine is part of a Windows 2000 Domain. This may be + // used in a Windows 2000 Advanced Server managed domain. +}; + +static_assert(sizeof(struct NTFS_DE_O) == 0x58); + +#define NTFS_OBJECT_ENTRY_DATA_SIZE1 \ + 0x38 // struct NTFS_DE_O.BirthDomainId is not used +#define NTFS_OBJECT_ENTRY_DATA_SIZE2 \ + 0x48 // struct NTFS_DE_O.BirthDomainId is used + +/* Q Directory entry structure ( rule = 0x11 ) */ +struct NTFS_DE_Q { + struct NTFS_DE de; + __le32 owner_id; // 0x10: Unique Id assigned to file + __le32 Version; // 0x14: 0x02 + __le32 flags2; // 0x18: Quota flags, see above + __le64 BytesUsed; // 0x1C: + __le64 ChangeTime; // 0x24: + __le64 WarningLimit; // 0x28: + __le64 HardLimit; // 0x34: + __le64 ExceededTime; // 0x3C: + + // SID is placed here +}; // sizeof() = 0x44 + +#define SIZEOF_NTFS_DE_Q 0x44 + +#define SecurityDescriptorsBlockSize 0x40000 // 256K +#define SecurityDescriptorMaxSize 0x20000 // 128K +#define Log2OfSecurityDescriptorsBlockSize 18 + +struct SECURITY_KEY { + __le32 hash; // Hash value for descriptor + __le32 sec_id; // Security Id (guaranteed unique) +}; + +/* Security descriptors (the content of $Secure::SDS data stream) */ +struct SECURITY_HDR { + struct SECURITY_KEY key; // 0x00: Security Key + __le64 off; // 0x08: Offset of this entry in the file + __le32 size; // 0x10: Size of this entry, 8 byte aligned + // + // Security descriptor itself is placed here + // Total size is 16 byte aligned + // +} __packed; + +#define SIZEOF_SECURITY_HDR 0x14 + +/* SII Directory entry structure */ +struct NTFS_DE_SII { + struct NTFS_DE de; + __le32 sec_id; // 0x10: Key: sizeof(security_id) = wKeySize + struct SECURITY_HDR sec_hdr; // 0x14: +} __packed; + +#define SIZEOF_SII_DIRENTRY 0x28 + +/* SDH Directory entry structure */ +struct NTFS_DE_SDH { + struct NTFS_DE de; + struct SECURITY_KEY key; // 0x10: Key + struct SECURITY_HDR sec_hdr; // 0x18: Data + __le16 magic[2]; // 0x2C: 0x00490049 "I I" +}; + +#define SIZEOF_SDH_DIRENTRY 0x30 + +struct REPARSE_KEY { + __le32 ReparseTag; // 0x00: Reparse Tag + struct MFT_REF ref; // 0x04: MFT record number with this file +}; // sizeof() = 0x0C + +static_assert(offsetof(struct REPARSE_KEY, ref) == 0x04); +#define SIZEOF_REPARSE_KEY 0x0C + +/* Reparse Directory entry structure */ +struct NTFS_DE_R { + struct NTFS_DE de; + struct REPARSE_KEY key; // 0x10: Reparse Key + u32 zero; // 0x1c +}; // sizeof() = 0x20 + +static_assert(sizeof(struct NTFS_DE_R) == 0x20); + +/* CompressReparseBuffer.WofVersion */ +#define WOF_CURRENT_VERSION cpu_to_le32(1) +/* CompressReparseBuffer.WofProvider */ +#define WOF_PROVIDER_WIM cpu_to_le32(1) +/* CompressReparseBuffer.WofProvider */ +#define WOF_PROVIDER_SYSTEM cpu_to_le32(2) +/* CompressReparseBuffer.ProviderVer */ +#define WOF_PROVIDER_CURRENT_VERSION cpu_to_le32(1) + +#define WOF_COMPRESSION_XPRESS4K cpu_to_le32(0) // 4k +#define WOF_COMPRESSION_LZX32K cpu_to_le32(1) // 32k +#define WOF_COMPRESSION_XPRESS8K cpu_to_le32(2) // 8k +#define WOF_COMPRESSION_XPRESS16K cpu_to_le32(3) // 16k + +/* + * ATTR_REPARSE (0xC0) + * + * The reparse struct GUID structure is used by all 3rd party layered drivers to + * store data in a reparse point. For non-Microsoft tags, The struct GUID field + * cannot be GUID_NULL. + * The constraints on reparse tags are defined below. + * Microsoft tags can also be used with this format of the reparse point buffer. + */ +struct REPARSE_POINT { + __le32 ReparseTag; // 0x00: + __le16 ReparseDataLength;// 0x04: + __le16 Reserved; + + struct GUID Guid; // 0x08: + + // + // Here GenericReparseBuffer is placed + // +}; + +static_assert(sizeof(struct REPARSE_POINT) == 0x18); + +// +// Maximum allowed size of the reparse data. +// +#define MAXIMUM_REPARSE_DATA_BUFFER_SIZE (16 * 1024) + +// +// The value of the following constant needs to satisfy the following +// conditions: +// (1) Be at least as large as the largest of the reserved tags. +// (2) Be strictly smaller than all the tags in use. +// +#define IO_REPARSE_TAG_RESERVED_RANGE 1 + +// +// The reparse tags are a ULONG. The 32 bits are laid out as follows: +// +// 3 3 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 1 1 1 +// 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 +// +-+-+-+-+-----------------------+-------------------------------+ +// |M|R|N|R| Reserved bits | Reparse Tag Value | +// +-+-+-+-+-----------------------+-------------------------------+ +// +// M is the Microsoft bit. When set to 1, it denotes a tag owned by Microsoft. +// All ISVs must use a tag with a 0 in this position. +// Note: If a Microsoft tag is used by non-Microsoft software, the +// behavior is not defined. +// +// R is reserved. Must be zero for non-Microsoft tags. +// +// N is name surrogate. When set to 1, the file represents another named +// entity in the system. +// +// The M and N bits are OR-able. +// The following macros check for the M and N bit values: +// + +// +// Macro to determine whether a reparse point tag corresponds to a tag +// owned by Microsoft. +// +#define IsReparseTagMicrosoft(_tag) (((_tag)&IO_REPARSE_TAG_MICROSOFT)) + +// +// Macro to determine whether a reparse point tag is a name surrogate +// +#define IsReparseTagNameSurrogate(_tag) (((_tag)&IO_REPARSE_TAG_NAME_SURROGATE)) + +// +// The following constant represents the bits that are valid to use in +// reparse tags. +// +#define IO_REPARSE_TAG_VALID_VALUES 0xF000FFFF + +// +// Macro to determine whether a reparse tag is a valid tag. +// +#define IsReparseTagValid(_tag) \ + (!((_tag) & ~IO_REPARSE_TAG_VALID_VALUES) && \ + ((_tag) > IO_REPARSE_TAG_RESERVED_RANGE)) + +// +// Microsoft tags for reparse points. +// + +enum IO_REPARSE_TAG { + IO_REPARSE_TAG_SYMBOLIC_LINK = cpu_to_le32(0), + IO_REPARSE_TAG_NAME_SURROGATE = cpu_to_le32(0x20000000), + IO_REPARSE_TAG_MICROSOFT = cpu_to_le32(0x80000000), + IO_REPARSE_TAG_MOUNT_POINT = cpu_to_le32(0xA0000003), + IO_REPARSE_TAG_SYMLINK = cpu_to_le32(0xA000000C), + IO_REPARSE_TAG_HSM = cpu_to_le32(0xC0000004), + IO_REPARSE_TAG_SIS = cpu_to_le32(0x80000007), + IO_REPARSE_TAG_DEDUP = cpu_to_le32(0x80000013), + IO_REPARSE_TAG_COMPRESS = cpu_to_le32(0x80000017), + + // + // The reparse tag 0x80000008 is reserved for Microsoft internal use + // (may be published in the future) + // + + // + // Microsoft reparse tag reserved for DFS + // + IO_REPARSE_TAG_DFS = cpu_to_le32(0x8000000A), + + // + // Microsoft reparse tag reserved for the file system filter manager + // + IO_REPARSE_TAG_FILTER_MANAGER = cpu_to_le32(0x8000000B), + + // + // Non-Microsoft tags for reparse points + // + + // + // Tag allocated to CONGRUENT, May 2000. Used by IFSTEST + // + IO_REPARSE_TAG_IFSTEST_CONGRUENT = cpu_to_le32(0x00000009), + + // + // Tag allocated to ARKIVIO + // + IO_REPARSE_TAG_ARKIVIO = cpu_to_le32(0x0000000C), + + // + // Tag allocated to SOLUTIONSOFT + // + IO_REPARSE_TAG_SOLUTIONSOFT = cpu_to_le32(0x2000000D), + + // + // Tag allocated to COMMVAULT + // + IO_REPARSE_TAG_COMMVAULT = cpu_to_le32(0x0000000E), + + // OneDrive?? + IO_REPARSE_TAG_CLOUD = cpu_to_le32(0x9000001A), + IO_REPARSE_TAG_CLOUD_1 = cpu_to_le32(0x9000101A), + IO_REPARSE_TAG_CLOUD_2 = cpu_to_le32(0x9000201A), + IO_REPARSE_TAG_CLOUD_3 = cpu_to_le32(0x9000301A), + IO_REPARSE_TAG_CLOUD_4 = cpu_to_le32(0x9000401A), + IO_REPARSE_TAG_CLOUD_5 = cpu_to_le32(0x9000501A), + IO_REPARSE_TAG_CLOUD_6 = cpu_to_le32(0x9000601A), + IO_REPARSE_TAG_CLOUD_7 = cpu_to_le32(0x9000701A), + IO_REPARSE_TAG_CLOUD_8 = cpu_to_le32(0x9000801A), + IO_REPARSE_TAG_CLOUD_9 = cpu_to_le32(0x9000901A), + IO_REPARSE_TAG_CLOUD_A = cpu_to_le32(0x9000A01A), + IO_REPARSE_TAG_CLOUD_B = cpu_to_le32(0x9000B01A), + IO_REPARSE_TAG_CLOUD_C = cpu_to_le32(0x9000C01A), + IO_REPARSE_TAG_CLOUD_D = cpu_to_le32(0x9000D01A), + IO_REPARSE_TAG_CLOUD_E = cpu_to_le32(0x9000E01A), + IO_REPARSE_TAG_CLOUD_F = cpu_to_le32(0x9000F01A), + +}; + +#define SYMLINK_FLAG_RELATIVE 1 + +/* Microsoft reparse buffer. (see DDK for details) */ +struct REPARSE_DATA_BUFFER { + __le32 ReparseTag; // 0x00: + __le16 ReparseDataLength; // 0x04: + __le16 Reserved; + + union { + // If ReparseTag == 0xA0000003 (IO_REPARSE_TAG_MOUNT_POINT) + struct { + __le16 SubstituteNameOffset; // 0x08 + __le16 SubstituteNameLength; // 0x0A + __le16 PrintNameOffset; // 0x0C + __le16 PrintNameLength; // 0x0E + __le16 PathBuffer[1]; // 0x10 + } MountPointReparseBuffer; + + // If ReparseTag == 0xA000000C (IO_REPARSE_TAG_SYMLINK) + // https://msdn.microsoft.com/en-us/library/cc232006.aspx + struct { + __le16 SubstituteNameOffset; // 0x08 + __le16 SubstituteNameLength; // 0x0A + __le16 PrintNameOffset; // 0x0C + __le16 PrintNameLength; // 0x0E + // 0-absolute path 1- relative path, SYMLINK_FLAG_RELATIVE + __le32 Flags; // 0x10 + __le16 PathBuffer[1]; // 0x14 + } SymbolicLinkReparseBuffer; + + // If ReparseTag == 0x80000017U + struct { + __le32 WofVersion; // 0x08 == 1 + /* 1 - WIM backing provider ("WIMBoot"), + * 2 - System compressed file provider + */ + __le32 WofProvider; // 0x0C + __le32 ProviderVer; // 0x10: == 1 WOF_FILE_PROVIDER_CURRENT_VERSION == 1 + __le32 CompressionFormat; // 0x14: 0, 1, 2, 3. See WOF_COMPRESSION_XXX + } CompressReparseBuffer; + + struct { + u8 DataBuffer[1]; // 0x08 + } GenericReparseBuffer; + }; +}; + +/* ATTR_EA_INFO (0xD0) */ + +#define FILE_NEED_EA 0x80 // See ntifs.h +/* FILE_NEED_EA, indicates that the file to which the EA belongs cannot be + * interpreted without understanding the associated extended attributes. + */ +struct EA_INFO { + __le16 size_pack; // 0x00: Size of buffer to hold in packed form + __le16 count; // 0x02: Count of EA's with FILE_NEED_EA bit set + __le32 size; // 0x04: Size of buffer to hold in unpacked form +}; + +static_assert(sizeof(struct EA_INFO) == 8); + +/* ATTR_EA (0xE0) */ +struct EA_FULL { + __le32 size; // 0x00: (not in packed) + u8 flags; // 0x04 + u8 name_len; // 0x05 + __le16 elength; // 0x06 + u8 name[1]; // 0x08 +}; + +static_assert(offsetof(struct EA_FULL, name) == 8); + +#define ACL_REVISION 2 +#define ACL_REVISION_DS 4 + +#define SE_SELF_RELATIVE cpu_to_le16(0x8000) + +struct SECURITY_DESCRIPTOR_RELATIVE { + u8 Revision; + u8 Sbz1; + __le16 Control; + __le32 Owner; + __le32 Group; + __le32 Sacl; + __le32 Dacl; +}; +static_assert(sizeof(struct SECURITY_DESCRIPTOR_RELATIVE) == 0x14); + +struct ACE_HEADER { + u8 AceType; + u8 AceFlags; + __le16 AceSize; +}; +static_assert(sizeof(struct ACE_HEADER) == 4); + +struct ACL { + u8 AclRevision; + u8 Sbz1; + __le16 AclSize; + __le16 AceCount; + __le16 Sbz2; +}; +static_assert(sizeof(struct ACL) == 8); + +struct SID { + u8 Revision; + u8 SubAuthorityCount; + u8 IdentifierAuthority[6]; + __le32 SubAuthority[1]; +}; +static_assert(offsetof(struct SID, SubAuthority) == 8); + +// clang-format on diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h new file mode 100644 index 000000000000..60986dbc01a7 --- /dev/null +++ b/fs/ntfs3/ntfs_fs.h @@ -0,0 +1,1082 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + */ + +// clang-format off +#define MINUS_ONE_T ((size_t)(-1)) +/* Biggest MFT / smallest cluster */ +#define MAXIMUM_BYTES_PER_MFT 4096 +#define NTFS_BLOCKS_PER_MFT_RECORD (MAXIMUM_BYTES_PER_MFT / 512) + +#define MAXIMUM_BYTES_PER_INDEX 4096 +#define NTFS_BLOCKS_PER_INODE (MAXIMUM_BYTES_PER_INDEX / 512) + +/* ntfs specific error code when fixup failed*/ +#define E_NTFS_FIXUP 555 +/* ntfs specific error code about resident->nonresident*/ +#define E_NTFS_NONRESIDENT 556 + +/* sbi->flags */ +#define NTFS_FLAGS_NODISCARD 0x00000001 +#define NTFS_FLAGS_NEED_REPLAY 0x04000000 + +/* ni->ni_flags */ +/* + * Data attribute is external compressed (lzx/xpress) + * 1 - WOF_COMPRESSION_XPRESS4K + * 2 - WOF_COMPRESSION_XPRESS8K + * 3 - WOF_COMPRESSION_XPRESS16K + * 4 - WOF_COMPRESSION_LZX32K + */ +#define NI_FLAG_COMPRESSED_MASK 0x0000000f +/* Data attribute is deduplicated */ +#define NI_FLAG_DEDUPLICATED 0x00000010 +#define NI_FLAG_EA 0x00000020 +#define NI_FLAG_DIR 0x00000040 +#define NI_FLAG_RESIDENT 0x00000080 +#define NI_FLAG_UPDATE_PARENT 0x00000100 +// clang-format on + +struct ntfs_mount_options { + struct nls_table *nls; + + kuid_t fs_uid; + kgid_t fs_gid; + u16 fs_fmask_inv; + u16 fs_dmask_inv; + + unsigned uid : 1, /* uid was set */ + gid : 1, /* gid was set */ + fmask : 1, /* fmask was set */ + dmask : 1, /*dmask was set*/ + sys_immutable : 1, /* immutable system files */ + discard : 1, /* issue discard requests on deletions */ + sparse : 1, /*create sparse files*/ + showmeta : 1, /*show meta files*/ + nohidden : 1, /*do not show hidden files*/ + force : 1, /*rw mount dirty volume*/ + no_acs_rules : 1, /*exclude acs rules*/ + prealloc : 1 /*preallocate space when file is growing*/ + ; +}; + +/* special value to unpack and deallocate*/ +#define RUN_DEALLOCATE ((struct runs_tree *)(size_t)1) + +/* TODO: use rb tree instead of array */ +struct runs_tree { + struct ntfs_run *runs; + size_t count; // Currently used size a ntfs_run storage. + size_t allocated; // Currently allocated ntfs_run storage size. +}; + +struct ntfs_buffers { + /* Biggest MFT / smallest cluster = 4096 / 512 = 8 */ + /* Biggest index / smallest cluster = 4096 / 512 = 8 */ + struct buffer_head *bh[PAGE_SIZE >> SECTOR_SHIFT]; + u32 bytes; + u32 nbufs; + u32 off; +}; + +enum ALLOCATE_OPT { + ALLOCATE_DEF = 0, // Allocate all clusters + ALLOCATE_MFT = 1, // Allocate for MFT +}; + +enum bitmap_mutex_classes { + BITMAP_MUTEX_CLUSTERS = 0, + BITMAP_MUTEX_MFT = 1, +}; + +struct wnd_bitmap { + struct super_block *sb; + struct rw_semaphore rw_lock; + + struct runs_tree run; + size_t nbits; + + size_t total_zeroes; // total number of free bits + u16 *free_bits; // free bits in each window + size_t nwnd; + u32 bits_last; // bits in last window + + struct rb_root start_tree; // extents, sorted by 'start' + struct rb_root count_tree; // extents, sorted by 'count + start' + size_t count; // extents count + + /* + * -1 Tree is activated but not updated (too many fragments) + * 0 - Tree is not activated + * 1 - Tree is activated and updated + */ + int uptodated; + size_t extent_min; // Minimal extent used while building + size_t extent_max; // Upper estimate of biggest free block + + /* Zone [bit, end) */ + size_t zone_bit; + size_t zone_end; + + bool set_tail; // not necessary in driver + bool inited; +}; + +typedef int (*NTFS_CMP_FUNC)(const void *key1, size_t len1, const void *key2, + size_t len2, const void *param); + +enum index_mutex_classed { + INDEX_MUTEX_I30 = 0, + INDEX_MUTEX_SII = 1, + INDEX_MUTEX_SDH = 2, + INDEX_MUTEX_SO = 3, + INDEX_MUTEX_SQ = 4, + INDEX_MUTEX_SR = 5, + INDEX_MUTEX_TOTAL +}; + +/* ntfs_index - allocation unit inside directory */ +struct ntfs_index { + struct runs_tree bitmap_run; + struct runs_tree alloc_run; + /* read/write access to 'bitmap_run'/'alloc_run' while ntfs_readdir */ + struct rw_semaphore run_lock; + + /*TODO: remove 'cmp'*/ + NTFS_CMP_FUNC cmp; + + u8 index_bits; // log2(root->index_block_size) + u8 idx2vbn_bits; // log2(root->index_block_clst) + u8 vbn2vbo_bits; // index_block_size < cluster? 9 : cluster_bits + u8 type; // index_mutex_classed +}; + +/* Set when LogFile is replaying */ +#define NTFS_FLAGS_LOG_REPLAYING 0x00000008 + +/* Set when we changed first MFT's which copy must be updated in $MftMirr */ +#define NTFS_FLAGS_MFTMIRR 0x00001000 + +/* Minimum mft zone */ +#define NTFS_MIN_MFT_ZONE 100 + +/* ntfs file system in-core superblock data */ +struct ntfs_sb_info { + struct super_block *sb; + + u32 discard_granularity; + u64 discard_granularity_mask_inv; // ~(discard_granularity_mask_inv-1) + + u32 cluster_size; // bytes per cluster + u32 cluster_mask; // == cluster_size - 1 + u64 cluster_mask_inv; // ~(cluster_size - 1) + u32 block_mask; // sb->s_blocksize - 1 + u32 blocks_per_cluster; // cluster_size / sb->s_blocksize + + u32 record_size; + u32 sector_size; + u32 index_size; + + u8 sector_bits; + u8 cluster_bits; + u8 record_bits; + + u64 maxbytes; // Maximum size for normal files + u64 maxbytes_sparse; // Maximum size for sparse file + + u32 flags; // See NTFS_FLAGS_XXX + + CLST bad_clusters; // The count of marked bad clusters + + u16 max_bytes_per_attr; // maximum attribute size in record + u16 attr_size_tr; // attribute size threshold (320 bytes) + + /* Records in $Extend */ + CLST objid_no; + CLST quota_no; + CLST reparse_no; + CLST usn_jrnl_no; + + struct ATTR_DEF_ENTRY *def_table; // attribute definition table + u32 def_entries; + u32 ea_max_size; + + struct MFT_REC *new_rec; + + u16 *upcase; + + struct { + u64 lbo, lbo2; + struct ntfs_inode *ni; + struct wnd_bitmap bitmap; // $MFT::Bitmap + /* + * MFT records [11-24) used to expand MFT itself + * They always marked as used in $MFT::Bitmap + * 'reserved_bitmap' contains real bitmap of these records + */ + ulong reserved_bitmap; // bitmap of used records [11 - 24) + size_t next_free; // The next record to allocate from + size_t used; // mft valid size in records + u32 recs_mirr; // Number of records in MFTMirr + u8 next_reserved; + u8 reserved_bitmap_inited; + } mft; + + struct { + struct wnd_bitmap bitmap; // $Bitmap::Data + CLST next_free_lcn; + } used; + + struct { + u64 size; // in bytes + u64 blocks; // in blocks + u64 ser_num; + struct ntfs_inode *ni; + __le16 flags; // cached current VOLUME_INFO::flags, VOLUME_FLAG_DIRTY + u8 major_ver; + u8 minor_ver; + char label[65]; + bool real_dirty; /* real fs state*/ + } volume; + + struct { + struct ntfs_index index_sii; + struct ntfs_index index_sdh; + struct ntfs_inode *ni; + u32 next_id; + u64 next_off; + + __le32 def_security_id; + } security; + + struct { + struct ntfs_index index_r; + struct ntfs_inode *ni; + u64 max_size; // 16K + } reparse; + + struct { + struct ntfs_index index_o; + struct ntfs_inode *ni; + } objid; + + struct { + struct mutex mtx_lznt; + struct lznt *lznt; +#ifdef CONFIG_NTFS3_LZX_XPRESS + struct mutex mtx_xpress; + struct xpress_decompressor *xpress; + struct mutex mtx_lzx; + struct lzx_decompressor *lzx; +#endif + } compress; + + struct ntfs_mount_options options; + struct ratelimit_state msg_ratelimit; +}; + +/* + * one MFT record(usually 1024 bytes), consists of attributes + */ +struct mft_inode { + struct rb_node node; + struct ntfs_sb_info *sbi; + + struct MFT_REC *mrec; + struct ntfs_buffers nb; + + CLST rno; + bool dirty; +}; + +/* nested class for ntfs_inode::ni_lock */ +enum ntfs_inode_mutex_lock_class { + NTFS_INODE_MUTEX_DIRTY, + NTFS_INODE_MUTEX_SECURITY, + NTFS_INODE_MUTEX_OBJID, + NTFS_INODE_MUTEX_REPARSE, + NTFS_INODE_MUTEX_NORMAL, + NTFS_INODE_MUTEX_PARENT, +}; + +/* + * ntfs inode - extends linux inode. consists of one or more mft inodes + */ +struct ntfs_inode { + struct mft_inode mi; // base record + + /* + * Valid size: [0 - i_valid) - these range in file contains valid data + * Range [i_valid - inode->i_size) - contains 0 + * Usually i_valid <= inode->i_size + */ + u64 i_valid; + struct timespec64 i_crtime; + + struct mutex ni_lock; + + /* file attributes from std */ + enum FILE_ATTRIBUTE std_fa; + __le32 std_security_id; + + /* + * tree of mft_inode + * not empty when primary MFT record (usually 1024 bytes) can't save all attributes + * e.g. file becomes too fragmented or contains a lot of names + */ + struct rb_root mi_tree; + + /* + * This member is used in ntfs_readdir to ensure that all subrecords are loaded + */ + u8 mi_loaded; + + union { + struct ntfs_index dir; + struct { + struct rw_semaphore run_lock; + struct runs_tree run; +#ifdef CONFIG_NTFS3_LZX_XPRESS + struct page *offs_page; +#endif + } file; + }; + + struct { + struct runs_tree run; + struct ATTR_LIST_ENTRY *le; // 1K aligned memory + size_t size; + bool dirty; + } attr_list; + + size_t ni_flags; // NI_FLAG_XXX + + struct inode vfs_inode; +}; + +struct indx_node { + struct ntfs_buffers nb; + struct INDEX_BUFFER *index; +}; + +struct ntfs_fnd { + int level; + struct indx_node *nodes[20]; + struct NTFS_DE *de[20]; + struct NTFS_DE *root_de; +}; + +enum REPARSE_SIGN { + REPARSE_NONE = 0, + REPARSE_COMPRESSED = 1, + REPARSE_DEDUPLICATED = 2, + REPARSE_LINK = 3 +}; + +/* functions from attrib.c*/ +int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni, + struct runs_tree *run, const CLST *vcn); +int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run, + CLST vcn, CLST lcn, CLST len, CLST *pre_alloc, + enum ALLOCATE_OPT opt, CLST *alen, const size_t fr, + CLST *new_lcn); +int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr, + struct ATTR_LIST_ENTRY *le, struct mft_inode *mi, + u64 new_size, struct runs_tree *run, + struct ATTRIB **ins_attr, struct page *page); +int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, struct runs_tree *run, + u64 new_size, const u64 *new_valid, bool keep_prealloc, + struct ATTRIB **ret); +int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn, + CLST *len, bool *new); +int attr_data_read_resident(struct ntfs_inode *ni, struct page *page); +int attr_data_write_resident(struct ntfs_inode *ni, struct page *page); +int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, struct runs_tree *run, + CLST vcn); +int attr_load_runs_range(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, struct runs_tree *run, + u64 from, u64 to); +int attr_wof_frame_info(struct ntfs_inode *ni, struct ATTRIB *attr, + struct runs_tree *run, u64 frame, u64 frames, + u8 frame_bits, u32 *ondisk_size, u64 *vbo_data); +int attr_is_frame_compressed(struct ntfs_inode *ni, struct ATTRIB *attr, + CLST frame, CLST *clst_data); +int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size, + u64 new_valid); +int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes); +int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes); + +/* functions from attrlist.c*/ +void al_destroy(struct ntfs_inode *ni); +bool al_verify(struct ntfs_inode *ni); +int ntfs_load_attr_list(struct ntfs_inode *ni, struct ATTRIB *attr); +struct ATTR_LIST_ENTRY *al_enumerate(struct ntfs_inode *ni, + struct ATTR_LIST_ENTRY *le); +struct ATTR_LIST_ENTRY *al_find_le(struct ntfs_inode *ni, + struct ATTR_LIST_ENTRY *le, + const struct ATTRIB *attr); +struct ATTR_LIST_ENTRY *al_find_ex(struct ntfs_inode *ni, + struct ATTR_LIST_ENTRY *le, + enum ATTR_TYPE type, const __le16 *name, + u8 name_len, const CLST *vcn); +int al_add_le(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name, + u8 name_len, CLST svcn, __le16 id, const struct MFT_REF *ref, + struct ATTR_LIST_ENTRY **new_le); +bool al_remove_le(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le); +bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn, + const __le16 *name, size_t name_len, + const struct MFT_REF *ref); +int al_update(struct ntfs_inode *ni); +static inline size_t al_aligned(size_t size) +{ + return (size + 1023) & ~(size_t)1023; +} + +/* globals from bitfunc.c */ +bool are_bits_clear(const ulong *map, size_t bit, size_t nbits); +bool are_bits_set(const ulong *map, size_t bit, size_t nbits); +size_t get_set_bits_ex(const ulong *map, size_t bit, size_t nbits); + +/* globals from dir.c */ +int ntfs_utf16_to_nls(struct ntfs_sb_info *sbi, const struct le_str *uni, + u8 *buf, int buf_len); +int ntfs_nls_to_utf16(struct ntfs_sb_info *sbi, const u8 *name, u32 name_len, + struct cpu_str *uni, u32 max_ulen, + enum utf16_endian endian); +struct inode *dir_search_u(struct inode *dir, const struct cpu_str *uni, + struct ntfs_fnd *fnd); +bool dir_is_empty(struct inode *dir); +extern const struct file_operations ntfs_dir_operations; + +/* globals from file.c*/ +int ntfs_getattr(const struct path *path, struct kstat *stat, u32 request_mask, + u32 flags); +void ntfs_sparse_cluster(struct inode *inode, struct page *page0, CLST vcn, + CLST len); +int ntfs3_setattr(struct dentry *dentry, struct iattr *attr); +int ntfs_file_open(struct inode *inode, struct file *file); +int ntfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, + __u64 start, __u64 len); +extern const struct inode_operations ntfs_special_inode_operations; +extern const struct inode_operations ntfs_file_inode_operations; +extern const struct file_operations ntfs_file_operations; + +/* globals from frecord.c */ +void ni_remove_mi(struct ntfs_inode *ni, struct mft_inode *mi); +struct ATTR_STD_INFO *ni_std(struct ntfs_inode *ni); +struct ATTR_STD_INFO5 *ni_std5(struct ntfs_inode *ni); +void ni_clear(struct ntfs_inode *ni); +int ni_load_mi_ex(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi); +int ni_load_mi(struct ntfs_inode *ni, struct ATTR_LIST_ENTRY *le, + struct mft_inode **mi); +struct ATTRIB *ni_find_attr(struct ntfs_inode *ni, struct ATTRIB *attr, + struct ATTR_LIST_ENTRY **entry_o, + enum ATTR_TYPE type, const __le16 *name, + u8 name_len, const CLST *vcn, + struct mft_inode **mi); +struct ATTRIB *ni_enum_attr_ex(struct ntfs_inode *ni, struct ATTRIB *attr, + struct ATTR_LIST_ENTRY **le, + struct mft_inode **mi); +struct ATTRIB *ni_load_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, CLST vcn, + struct mft_inode **pmi); +int ni_load_all_mi(struct ntfs_inode *ni); +bool ni_add_subrecord(struct ntfs_inode *ni, CLST rno, struct mft_inode **mi); +int ni_remove_attr(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, size_t name_len, bool base_only, + const __le16 *id); +int ni_create_attr_list(struct ntfs_inode *ni); +int ni_expand_list(struct ntfs_inode *ni); +int ni_insert_nonresident(struct ntfs_inode *ni, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, + const struct runs_tree *run, CLST svcn, CLST len, + __le16 flags, struct ATTRIB **new_attr, + struct mft_inode **mi); +int ni_insert_resident(struct ntfs_inode *ni, u32 data_size, + enum ATTR_TYPE type, const __le16 *name, u8 name_len, + struct ATTRIB **new_attr, struct mft_inode **mi); +int ni_remove_attr_le(struct ntfs_inode *ni, struct ATTRIB *attr, + struct ATTR_LIST_ENTRY *le); +int ni_delete_all(struct ntfs_inode *ni); +struct ATTR_FILE_NAME *ni_fname_name(struct ntfs_inode *ni, + const struct cpu_str *uni, + const struct MFT_REF *home, + struct ATTR_LIST_ENTRY **entry); +struct ATTR_FILE_NAME *ni_fname_type(struct ntfs_inode *ni, u8 name_type, + struct ATTR_LIST_ENTRY **entry); +int ni_new_attr_flags(struct ntfs_inode *ni, enum FILE_ATTRIBUTE new_fa); +enum REPARSE_SIGN ni_parse_reparse(struct ntfs_inode *ni, struct ATTRIB *attr, + void *buffer); +int ni_write_inode(struct inode *inode, int sync, const char *hint); +#define _ni_write_inode(i, w) ni_write_inode(i, w, __func__) +int ni_fiemap(struct ntfs_inode *ni, struct fiemap_extent_info *fieinfo, + __u64 vbo, __u64 len); +int ni_readpage_cmpr(struct ntfs_inode *ni, struct page *page); +int ni_decompress_file(struct ntfs_inode *ni); +int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages, + u32 pages_per_frame); +int ni_write_frame(struct ntfs_inode *ni, struct page **pages, + u32 pages_per_frame); + +/* globals from fslog.c */ +int log_replay(struct ntfs_inode *ni, bool *initialized); + +/* globals from fsntfs.c */ +bool ntfs_fix_pre_write(struct NTFS_RECORD_HEADER *rhdr, size_t bytes); +int ntfs_fix_post_read(struct NTFS_RECORD_HEADER *rhdr, size_t bytes, + bool simple); +int ntfs_extend_init(struct ntfs_sb_info *sbi); +int ntfs_loadlog_and_replay(struct ntfs_inode *ni, struct ntfs_sb_info *sbi); +const struct ATTR_DEF_ENTRY *ntfs_query_def(struct ntfs_sb_info *sbi, + enum ATTR_TYPE Type); +int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len, + CLST *new_lcn, CLST *new_len, + enum ALLOCATE_OPT opt); +int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft, + struct ntfs_inode *ni, struct mft_inode **mi); +void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST nRecord); +int ntfs_clear_mft_tail(struct ntfs_sb_info *sbi, size_t from, size_t to); +int ntfs_refresh_zone(struct ntfs_sb_info *sbi); +int ntfs_update_mftmirr(struct ntfs_sb_info *sbi, int wait); +enum NTFS_DIRTY_FLAGS { + NTFS_DIRTY_CLEAR = 0, + NTFS_DIRTY_DIRTY = 1, + NTFS_DIRTY_ERROR = 2, +}; +int ntfs_set_state(struct ntfs_sb_info *sbi, enum NTFS_DIRTY_FLAGS dirty); +int ntfs_sb_read(struct super_block *sb, u64 lbo, size_t bytes, void *buffer); +int ntfs_sb_write(struct super_block *sb, u64 lbo, size_t bytes, + const void *buffer, int wait); +int ntfs_sb_write_run(struct ntfs_sb_info *sbi, const struct runs_tree *run, + u64 vbo, const void *buf, size_t bytes); +struct buffer_head *ntfs_bread_run(struct ntfs_sb_info *sbi, + const struct runs_tree *run, u64 vbo); +int ntfs_read_run_nb(struct ntfs_sb_info *sbi, const struct runs_tree *run, + u64 vbo, void *buf, u32 bytes, struct ntfs_buffers *nb); +int ntfs_read_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, + struct NTFS_RECORD_HEADER *rhdr, u32 bytes, + struct ntfs_buffers *nb); +int ntfs_get_bh(struct ntfs_sb_info *sbi, const struct runs_tree *run, u64 vbo, + u32 bytes, struct ntfs_buffers *nb); +int ntfs_write_bh(struct ntfs_sb_info *sbi, struct NTFS_RECORD_HEADER *rhdr, + struct ntfs_buffers *nb, int sync); +int ntfs_bio_pages(struct ntfs_sb_info *sbi, const struct runs_tree *run, + struct page **pages, u32 nr_pages, u64 vbo, u32 bytes, + u32 op); +int ntfs_bio_fill_1(struct ntfs_sb_info *sbi, const struct runs_tree *run); +int ntfs_vbo_to_lbo(struct ntfs_sb_info *sbi, const struct runs_tree *run, + u64 vbo, u64 *lbo, u64 *bytes); +struct ntfs_inode *ntfs_new_inode(struct ntfs_sb_info *sbi, CLST nRec, + bool dir); +extern const u8 s_default_security[0x50]; +bool is_sd_valid(const struct SECURITY_DESCRIPTOR_RELATIVE *sd, u32 len); +int ntfs_security_init(struct ntfs_sb_info *sbi); +int ntfs_get_security_by_id(struct ntfs_sb_info *sbi, __le32 security_id, + struct SECURITY_DESCRIPTOR_RELATIVE **sd, + size_t *size); +int ntfs_insert_security(struct ntfs_sb_info *sbi, + const struct SECURITY_DESCRIPTOR_RELATIVE *sd, + u32 size, __le32 *security_id, bool *inserted); +int ntfs_reparse_init(struct ntfs_sb_info *sbi); +int ntfs_objid_init(struct ntfs_sb_info *sbi); +int ntfs_objid_remove(struct ntfs_sb_info *sbi, struct GUID *guid); +int ntfs_insert_reparse(struct ntfs_sb_info *sbi, __le32 rtag, + const struct MFT_REF *ref); +int ntfs_remove_reparse(struct ntfs_sb_info *sbi, __le32 rtag, + const struct MFT_REF *ref); +void mark_as_free_ex(struct ntfs_sb_info *sbi, CLST lcn, CLST len, bool trim); +int run_deallocate(struct ntfs_sb_info *sbi, struct runs_tree *run, bool trim); + +/* globals from index.c */ +int indx_used_bit(struct ntfs_index *indx, struct ntfs_inode *ni, size_t *bit); +void fnd_clear(struct ntfs_fnd *fnd); +static inline struct ntfs_fnd *fnd_get(void) +{ + return ntfs_zalloc(sizeof(struct ntfs_fnd)); +} +static inline void fnd_put(struct ntfs_fnd *fnd) +{ + if (fnd) { + fnd_clear(fnd); + ntfs_free(fnd); + } +} +void indx_clear(struct ntfs_index *idx); +int indx_init(struct ntfs_index *indx, struct ntfs_sb_info *sbi, + const struct ATTRIB *attr, enum index_mutex_classed type); +struct INDEX_ROOT *indx_get_root(struct ntfs_index *indx, struct ntfs_inode *ni, + struct ATTRIB **attr, struct mft_inode **mi); +int indx_read(struct ntfs_index *idx, struct ntfs_inode *ni, CLST vbn, + struct indx_node **node); +int indx_find(struct ntfs_index *indx, struct ntfs_inode *dir, + const struct INDEX_ROOT *root, const void *Key, size_t KeyLen, + const void *param, int *diff, struct NTFS_DE **entry, + struct ntfs_fnd *fnd); +int indx_find_sort(struct ntfs_index *indx, struct ntfs_inode *ni, + const struct INDEX_ROOT *root, struct NTFS_DE **entry, + struct ntfs_fnd *fnd); +int indx_find_raw(struct ntfs_index *indx, struct ntfs_inode *ni, + const struct INDEX_ROOT *root, struct NTFS_DE **entry, + size_t *off, struct ntfs_fnd *fnd); +int indx_insert_entry(struct ntfs_index *indx, struct ntfs_inode *ni, + const struct NTFS_DE *new_de, const void *param, + struct ntfs_fnd *fnd); +int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni, + const void *key, u32 key_len, const void *param); +int indx_update_dup(struct ntfs_inode *ni, struct ntfs_sb_info *sbi, + const struct ATTR_FILE_NAME *fname, + const struct NTFS_DUP_INFO *dup, int sync); + +/* globals from inode.c */ +struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref, + const struct cpu_str *name); +int ntfs_set_size(struct inode *inode, u64 new_size); +int reset_log_file(struct inode *inode); +int ntfs_get_block(struct inode *inode, sector_t vbn, + struct buffer_head *bh_result, int create); +int ntfs3_write_inode(struct inode *inode, struct writeback_control *wbc); +int ntfs_sync_inode(struct inode *inode); +int ntfs_flush_inodes(struct super_block *sb, struct inode *i1, + struct inode *i2); +int inode_write_data(struct inode *inode, const void *data, size_t bytes); +struct inode *ntfs_create_inode(struct inode *dir, struct dentry *dentry, + const struct cpu_str *uni, umode_t mode, + dev_t dev, const char *symname, u32 size, + int excl, struct ntfs_fnd *fnd); +int ntfs_link_inode(struct inode *inode, struct dentry *dentry); +int ntfs_unlink_inode(struct inode *dir, const struct dentry *dentry); +void ntfs_evict_inode(struct inode *inode); +extern const struct inode_operations ntfs_link_inode_operations; +extern const struct address_space_operations ntfs_aops; +extern const struct address_space_operations ntfs_aops_cmpr; + +/* globals from name_i.c*/ +int fill_name_de(struct ntfs_sb_info *sbi, void *buf, const struct qstr *name, + const struct cpu_str *uni); +struct dentry *ntfs3_get_parent(struct dentry *child); + +extern const struct inode_operations ntfs_dir_inode_operations; + +/* globals from record.c */ +int mi_get(struct ntfs_sb_info *sbi, CLST rno, struct mft_inode **mi); +void mi_put(struct mft_inode *mi); +int mi_init(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno); +int mi_read(struct mft_inode *mi, bool is_mft); +struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr); +// TODO: id? +struct ATTRIB *mi_find_attr(struct mft_inode *mi, struct ATTRIB *attr, + enum ATTR_TYPE type, const __le16 *name, + size_t name_len, const __le16 *id); +static inline struct ATTRIB *rec_find_attr_le(struct mft_inode *rec, + struct ATTR_LIST_ENTRY *le) +{ + return mi_find_attr(rec, NULL, le->type, le_name(le), le->name_len, + &le->id); +} +int mi_write(struct mft_inode *mi, int wait); +int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno, + __le16 flags, bool is_mft); +void mi_mark_free(struct mft_inode *mi); +struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, u32 asize, + u16 name_off); + +bool mi_remove_attr(struct mft_inode *mi, struct ATTRIB *attr); +bool mi_resize_attr(struct mft_inode *mi, struct ATTRIB *attr, int bytes); +int mi_pack_runs(struct mft_inode *mi, struct ATTRIB *attr, + struct runs_tree *run, CLST len); +static inline bool mi_is_ref(const struct mft_inode *mi, + const struct MFT_REF *ref) +{ + if (le32_to_cpu(ref->low) != mi->rno) + return false; + if (ref->seq != mi->mrec->seq) + return false; + +#ifdef CONFIG_NTFS3_64BIT_CLUSTER + return le16_to_cpu(ref->high) == (mi->rno >> 32); +#else + return !ref->high; +#endif +} + +static inline void mi_get_ref(const struct mft_inode *mi, struct MFT_REF *ref) +{ + ref->low = cpu_to_le32(mi->rno); +#ifdef CONFIG_NTFS3_64BIT_CLUSTER + ref->high = cpu_to_le16(mi->rno >> 32); +#else + ref->high = 0; +#endif + ref->seq = mi->mrec->seq; +} + +/* globals from run.c */ +bool run_lookup_entry(const struct runs_tree *run, CLST vcn, CLST *lcn, + CLST *len, size_t *index); +void run_truncate(struct runs_tree *run, CLST vcn); +void run_truncate_head(struct runs_tree *run, CLST vcn); +void run_truncate_around(struct runs_tree *run, CLST vcn); +bool run_lookup(const struct runs_tree *run, CLST vcn, size_t *Index); +bool run_add_entry(struct runs_tree *run, CLST vcn, CLST lcn, CLST len, + bool is_mft); +bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len); +bool run_get_entry(const struct runs_tree *run, size_t index, CLST *vcn, + CLST *lcn, CLST *len); +bool run_is_mapped_full(const struct runs_tree *run, CLST svcn, CLST evcn); + +int run_pack(const struct runs_tree *run, CLST svcn, CLST len, u8 *run_buf, + u32 run_buf_size, CLST *packed_vcns); +int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, + CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf, + u32 run_buf_size); + +#ifdef NTFS3_CHECK_FREE_CLST +int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, + CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf, + u32 run_buf_size); +#else +#define run_unpack_ex run_unpack +#endif +int run_get_highest_vcn(CLST vcn, const u8 *run_buf, u64 *highest_vcn); + +/* globals from super.c */ +void *ntfs_set_shared(void *ptr, u32 bytes); +void *ntfs_put_shared(void *ptr); +void ntfs_unmap_meta(struct super_block *sb, CLST lcn, CLST len); +int ntfs_discard(struct ntfs_sb_info *sbi, CLST Lcn, CLST Len); + +/* globals from bitmap.c*/ +int __init ntfs3_init_bitmap(void); +void ntfs3_exit_bitmap(void); +void wnd_close(struct wnd_bitmap *wnd); +static inline size_t wnd_zeroes(const struct wnd_bitmap *wnd) +{ + return wnd->total_zeroes; +} +int wnd_init(struct wnd_bitmap *wnd, struct super_block *sb, size_t nbits); +int wnd_set_free(struct wnd_bitmap *wnd, size_t bit, size_t bits); +int wnd_set_used(struct wnd_bitmap *wnd, size_t bit, size_t bits); +bool wnd_is_free(struct wnd_bitmap *wnd, size_t bit, size_t bits); +bool wnd_is_used(struct wnd_bitmap *wnd, size_t bit, size_t bits); + +/* Possible values for 'flags' 'wnd_find' */ +#define BITMAP_FIND_MARK_AS_USED 0x01 +#define BITMAP_FIND_FULL 0x02 +size_t wnd_find(struct wnd_bitmap *wnd, size_t to_alloc, size_t hint, + size_t flags, size_t *allocated); +int wnd_extend(struct wnd_bitmap *wnd, size_t new_bits); +void wnd_zone_set(struct wnd_bitmap *wnd, size_t Lcn, size_t Len); +int ntfs_trim_fs(struct ntfs_sb_info *sbi, struct fstrim_range *range); + +/* globals from upcase.c */ +int ntfs_cmp_names(const __le16 *s1, size_t l1, const __le16 *s2, size_t l2, + const u16 *upcase, bool bothcase); +int ntfs_cmp_names_cpu(const struct cpu_str *uni1, const struct le_str *uni2, + const u16 *upcase, bool bothcase); + +/* globals from xattr.c */ +#ifdef CONFIG_NTFS3_FS_POSIX_ACL +struct posix_acl *ntfs_get_acl(struct inode *inode, int type); +int ntfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); +int ntfs_init_acl(struct inode *inode, struct inode *dir); +#else +#define ntfs_get_acl NULL +#define ntfs_set_acl NULL +#endif + +int ntfs_acl_chmod(struct inode *inode); +int ntfs_permission(struct inode *inode, int mask); +ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size); +extern const struct xattr_handler *ntfs_xattr_handlers[]; + +/* globals from lznt.c */ +struct lznt *get_lznt_ctx(int level); +size_t compress_lznt(const void *uncompressed, size_t uncompressed_size, + void *compressed, size_t compressed_size, + struct lznt *ctx); +ssize_t decompress_lznt(const void *compressed, size_t compressed_size, + void *uncompressed, size_t uncompressed_size); + +static inline bool is_ntfs3(struct ntfs_sb_info *sbi) +{ + return sbi->volume.major_ver >= 3; +} + +/*(sb->s_flags & SB_ACTIVE)*/ +static inline bool is_mounted(struct ntfs_sb_info *sbi) +{ + return !!sbi->sb->s_root; +} + +static inline bool ntfs_is_meta_file(struct ntfs_sb_info *sbi, CLST rno) +{ + return rno < MFT_REC_FREE || rno == sbi->objid_no || + rno == sbi->quota_no || rno == sbi->reparse_no || + rno == sbi->usn_jrnl_no; +} + +static inline void ntfs_unmap_page(struct page *page) +{ + kunmap(page); + put_page(page); +} + +static inline struct page *ntfs_map_page(struct address_space *mapping, + unsigned long index) +{ + struct page *page = read_mapping_page(mapping, index, NULL); + + if (!IS_ERR(page)) { + kmap(page); + if (!PageError(page)) + return page; + ntfs_unmap_page(page); + return ERR_PTR(-EIO); + } + return page; +} + +static inline size_t wnd_zone_bit(const struct wnd_bitmap *wnd) +{ + return wnd->zone_bit; +} + +static inline size_t wnd_zone_len(const struct wnd_bitmap *wnd) +{ + return wnd->zone_end - wnd->zone_bit; +} + +static inline void run_init(struct runs_tree *run) +{ + run->runs = NULL; + run->count = 0; + run->allocated = 0; +} + +static inline struct runs_tree *run_alloc(void) +{ + return ntfs_zalloc(sizeof(struct runs_tree)); +} + +static inline void run_close(struct runs_tree *run) +{ + ntfs_vfree(run->runs); + memset(run, 0, sizeof(*run)); +} + +static inline void run_free(struct runs_tree *run) +{ + if (run) { + ntfs_vfree(run->runs); + ntfs_free(run); + } +} + +static inline bool run_is_empty(struct runs_tree *run) +{ + return !run->count; +} + +/* NTFS uses quad aligned bitmaps */ +static inline size_t bitmap_size(size_t bits) +{ + return QuadAlign((bits + 7) >> 3); +} + +#define _100ns2seconds 10000000 +#define SecondsToStartOf1970 0x00000002B6109100 + +#define NTFS_TIME_GRAN 100 + +/* + * kernel2nt + * + * converts in-memory kernel timestamp into nt time + */ +static inline __le64 kernel2nt(const struct timespec64 *ts) +{ + // 10^7 units of 100 nanoseconds one second + return cpu_to_le64(_100ns2seconds * + (ts->tv_sec + SecondsToStartOf1970) + + ts->tv_nsec / NTFS_TIME_GRAN); +} + +/* + * nt2kernel + * + * converts on-disk nt time into kernel timestamp + */ +static inline void nt2kernel(const __le64 tm, struct timespec64 *ts) +{ + u64 t = le64_to_cpu(tm) - _100ns2seconds * SecondsToStartOf1970; + + // WARNING: do_div changes its first argument(!) + ts->tv_nsec = do_div(t, _100ns2seconds) * 100; + ts->tv_sec = t; +} + +static inline struct ntfs_sb_info *ntfs_sb(struct super_block *sb) +{ + return sb->s_fs_info; +} + +/* Align up on cluster boundary */ +static inline u64 ntfs_up_cluster(const struct ntfs_sb_info *sbi, u64 size) +{ + return (size + sbi->cluster_mask) & sbi->cluster_mask_inv; +} + +/* Align up on cluster boundary */ +static inline u64 ntfs_up_block(const struct super_block *sb, u64 size) +{ + return (size + sb->s_blocksize - 1) & ~(u64)(sb->s_blocksize - 1); +} + +static inline CLST bytes_to_cluster(const struct ntfs_sb_info *sbi, u64 size) +{ + return (size + sbi->cluster_mask) >> sbi->cluster_bits; +} + +static inline u64 bytes_to_block(const struct super_block *sb, u64 size) +{ + return (size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; +} + +static inline struct buffer_head *ntfs_bread(struct super_block *sb, + sector_t block) +{ + struct buffer_head *bh; + + bh = sb_bread(sb, block); + if (bh) + return bh; + + ntfs_err(sb, "failed to read volume at offset 0x%llx", + (u64)block << sb->s_blocksize_bits); + return NULL; +} + +static inline bool is_power_of2(size_t v) +{ + return v && !(v & (v - 1)); +} + +static inline struct ntfs_inode *ntfs_i(struct inode *inode) +{ + return container_of(inode, struct ntfs_inode, vfs_inode); +} + +static inline bool is_compressed(const struct ntfs_inode *ni) +{ + return (ni->std_fa & FILE_ATTRIBUTE_COMPRESSED) || + (ni->ni_flags & NI_FLAG_COMPRESSED_MASK); +} + +static inline int ni_ext_compress_bits(const struct ntfs_inode *ni) +{ + return 0xb + (ni->ni_flags & NI_FLAG_COMPRESSED_MASK); +} + +/* bits - 0xc, 0xd, 0xe, 0xf, 0x10 */ +static inline void ni_set_ext_compress_bits(struct ntfs_inode *ni, u8 bits) +{ + ni->ni_flags |= (bits - 0xb) & NI_FLAG_COMPRESSED_MASK; +} + +static inline bool is_dedup(const struct ntfs_inode *ni) +{ + return ni->ni_flags & NI_FLAG_DEDUPLICATED; +} + +static inline bool is_encrypted(const struct ntfs_inode *ni) +{ + return ni->std_fa & FILE_ATTRIBUTE_ENCRYPTED; +} + +static inline bool is_sparsed(const struct ntfs_inode *ni) +{ + return ni->std_fa & FILE_ATTRIBUTE_SPARSE_FILE; +} + +static inline int is_resident(struct ntfs_inode *ni) +{ + return ni->ni_flags & NI_FLAG_RESIDENT; +} + +static inline void le16_sub_cpu(__le16 *var, u16 val) +{ + *var = cpu_to_le16(le16_to_cpu(*var) - val); +} + +static inline void le32_sub_cpu(__le32 *var, u32 val) +{ + *var = cpu_to_le32(le32_to_cpu(*var) - val); +} + +static inline void nb_put(struct ntfs_buffers *nb) +{ + u32 i, nbufs = nb->nbufs; + + if (!nbufs) + return; + + for (i = 0; i < nbufs; i++) + put_bh(nb->bh[i]); + nb->nbufs = 0; +} + +static inline void put_indx_node(struct indx_node *in) +{ + if (!in) + return; + + ntfs_free(in->index); + nb_put(&in->nb); + ntfs_free(in); +} + +static inline void mi_clear(struct mft_inode *mi) +{ + nb_put(&mi->nb); + ntfs_free(mi->mrec); + mi->mrec = NULL; +} + +static inline void ni_lock(struct ntfs_inode *ni) +{ + mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_NORMAL); +} + +static inline void ni_lock_dir(struct ntfs_inode *ni) +{ + mutex_lock_nested(&ni->ni_lock, NTFS_INODE_MUTEX_PARENT); +} + +static inline void ni_unlock(struct ntfs_inode *ni) +{ + mutex_unlock(&ni->ni_lock); +} + +static inline int ni_trylock(struct ntfs_inode *ni) +{ + return mutex_trylock(&ni->ni_lock); +} + +static inline int attr_load_runs_attr(struct ntfs_inode *ni, + struct ATTRIB *attr, + struct runs_tree *run, CLST vcn) +{ + return attr_load_runs_vcn(ni, attr->type, attr_name(attr), + attr->name_len, run, vcn); +} + +static inline void le64_sub_cpu(__le64 *var, u64 val) +{ + *var = cpu_to_le64(le64_to_cpu(*var) - val); +} diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c new file mode 100644 index 000000000000..0d4a6251bddc --- /dev/null +++ b/fs/ntfs3/record.c @@ -0,0 +1,609 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + */ + +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" + +static inline int compare_attr(const struct ATTRIB *left, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, + const u16 *upcase) +{ + /* First, compare the type codes: */ + int diff = le32_to_cpu(left->type) - le32_to_cpu(type); + + if (diff) + return diff; + + /* + * They have the same type code, so we have to compare the names. + */ + return ntfs_cmp_names(attr_name(left), left->name_len, name, name_len, + upcase, true); +} + +/* + * mi_new_attt_id + * + * returns unused attribute id that is less than mrec->next_attr_id + */ +static __le16 mi_new_attt_id(struct mft_inode *mi) +{ + u16 free_id, max_id, t16; + struct MFT_REC *rec = mi->mrec; + struct ATTRIB *attr; + __le16 id; + + id = rec->next_attr_id; + free_id = le16_to_cpu(id); + if (free_id < 0x7FFF) { + rec->next_attr_id = cpu_to_le16(free_id + 1); + return id; + } + + /* One record can store up to 1024/24 ~= 42 attributes */ + free_id = 0; + max_id = 0; + + attr = NULL; + + for (;;) { + attr = mi_enum_attr(mi, attr); + if (!attr) { + rec->next_attr_id = cpu_to_le16(max_id + 1); + mi->dirty = true; + return cpu_to_le16(free_id); + } + + t16 = le16_to_cpu(attr->id); + if (t16 == free_id) { + free_id += 1; + attr = NULL; + } else if (max_id < t16) + max_id = t16; + } +} + +int mi_get(struct ntfs_sb_info *sbi, CLST rno, struct mft_inode **mi) +{ + int err; + struct mft_inode *m = ntfs_zalloc(sizeof(struct mft_inode)); + + if (!m) + return -ENOMEM; + + err = mi_init(m, sbi, rno); + if (err) { + ntfs_free(m); + return err; + } + + err = mi_read(m, false); + if (err) { + mi_put(m); + return err; + } + + *mi = m; + return 0; +} + +void mi_put(struct mft_inode *mi) +{ + mi_clear(mi); + ntfs_free(mi); +} + +int mi_init(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno) +{ + mi->sbi = sbi; + mi->rno = rno; + mi->mrec = ntfs_malloc(sbi->record_size); + if (!mi->mrec) + return -ENOMEM; + + return 0; +} + +/* + * mi_read + * + * reads MFT data + */ +int mi_read(struct mft_inode *mi, bool is_mft) +{ + int err; + struct MFT_REC *rec = mi->mrec; + struct ntfs_sb_info *sbi = mi->sbi; + u32 bpr = sbi->record_size; + u64 vbo = (u64)mi->rno << sbi->record_bits; + struct ntfs_inode *mft_ni = sbi->mft.ni; + struct runs_tree *run = mft_ni ? &mft_ni->file.run : NULL; + struct rw_semaphore *rw_lock = NULL; + + if (is_mounted(sbi)) { + if (!is_mft) { + rw_lock = &mft_ni->file.run_lock; + down_read(rw_lock); + } + } + + err = ntfs_read_bh(sbi, run, vbo, &rec->rhdr, bpr, &mi->nb); + if (rw_lock) + up_read(rw_lock); + if (!err) + goto ok; + + if (err == -E_NTFS_FIXUP) { + mi->dirty = true; + goto ok; + } + + if (err != -ENOENT) + goto out; + + if (rw_lock) { + ni_lock(mft_ni); + down_write(rw_lock); + } + err = attr_load_runs_vcn(mft_ni, ATTR_DATA, NULL, 0, &mft_ni->file.run, + vbo >> sbi->cluster_bits); + if (rw_lock) { + up_write(rw_lock); + ni_unlock(mft_ni); + } + if (err) + goto out; + + if (rw_lock) + down_read(rw_lock); + err = ntfs_read_bh(sbi, run, vbo, &rec->rhdr, bpr, &mi->nb); + if (rw_lock) + up_read(rw_lock); + + if (err == -E_NTFS_FIXUP) { + mi->dirty = true; + goto ok; + } + if (err) + goto out; + +ok: + /* check field 'total' only here */ + if (le32_to_cpu(rec->total) != bpr) { + err = -EINVAL; + goto out; + } + + return 0; + +out: + return err; +} + +struct ATTRIB *mi_enum_attr(struct mft_inode *mi, struct ATTRIB *attr) +{ + const struct MFT_REC *rec = mi->mrec; + u32 used = le32_to_cpu(rec->used); + u32 t32, off, asize; + u16 t16; + + if (!attr) { + u32 total = le32_to_cpu(rec->total); + + off = le16_to_cpu(rec->attr_off); + + if (used > total) + return NULL; + + if (off >= used || off < MFTRECORD_FIXUP_OFFSET_1 || + !IsDwordAligned(off)) { + return NULL; + } + + /* Skip non-resident records */ + if (!is_rec_inuse(rec)) + return NULL; + + attr = Add2Ptr(rec, off); + } else { + /* Check if input attr inside record */ + off = PtrOffset(rec, attr); + if (off >= used) + return NULL; + + asize = le32_to_cpu(attr->size); + if (asize < SIZEOF_RESIDENT) { + /* Impossible 'cause we should not return such attribute */ + return NULL; + } + + attr = Add2Ptr(attr, asize); + off += asize; + } + + asize = le32_to_cpu(attr->size); + + /* Can we use the first field (attr->type) */ + if (off + 8 > used) { + static_assert(QuadAlign(sizeof(enum ATTR_TYPE)) == 8); + return NULL; + } + + if (attr->type == ATTR_END) { + /* end of enumeration */ + return NULL; + } + + /* 0x100 is last known attribute for now*/ + t32 = le32_to_cpu(attr->type); + if ((t32 & 0xf) || (t32 > 0x100)) + return NULL; + + /* Check boundary */ + if (off + asize > used) + return NULL; + + /* Check size of attribute */ + if (!attr->non_res) { + if (asize < SIZEOF_RESIDENT) + return NULL; + + t16 = le16_to_cpu(attr->res.data_off); + + if (t16 > asize) + return NULL; + + t32 = le32_to_cpu(attr->res.data_size); + if (t16 + t32 > asize) + return NULL; + + return attr; + } + + /* Check some nonresident fields */ + if (attr->name_len && + le16_to_cpu(attr->name_off) + sizeof(short) * attr->name_len > + le16_to_cpu(attr->nres.run_off)) { + return NULL; + } + + if (attr->nres.svcn || !is_attr_ext(attr)) { + if (asize + 8 < SIZEOF_NONRESIDENT) + return NULL; + + if (attr->nres.c_unit) + return NULL; + } else if (asize + 8 < SIZEOF_NONRESIDENT_EX) + return NULL; + + return attr; +} + +/* + * mi_find_attr + * + * finds the attribute by type and name and id + */ +struct ATTRIB *mi_find_attr(struct mft_inode *mi, struct ATTRIB *attr, + enum ATTR_TYPE type, const __le16 *name, + size_t name_len, const __le16 *id) +{ + u32 type_in = le32_to_cpu(type); + u32 atype; + +next_attr: + attr = mi_enum_attr(mi, attr); + if (!attr) + return NULL; + + atype = le32_to_cpu(attr->type); + if (atype > type_in) + return NULL; + + if (atype < type_in) + goto next_attr; + + if (attr->name_len != name_len) + goto next_attr; + + if (name_len && memcmp(attr_name(attr), name, name_len * sizeof(short))) + goto next_attr; + + if (id && *id != attr->id) + goto next_attr; + + return attr; +} + +int mi_write(struct mft_inode *mi, int wait) +{ + struct MFT_REC *rec; + int err; + struct ntfs_sb_info *sbi; + + if (!mi->dirty) + return 0; + + sbi = mi->sbi; + rec = mi->mrec; + + err = ntfs_write_bh(sbi, &rec->rhdr, &mi->nb, wait); + if (err) + return err; + + if (mi->rno < sbi->mft.recs_mirr) + sbi->flags |= NTFS_FLAGS_MFTMIRR; + + mi->dirty = false; + + return 0; +} + +int mi_format_new(struct mft_inode *mi, struct ntfs_sb_info *sbi, CLST rno, + __le16 flags, bool is_mft) +{ + int err; + u16 seq = 1; + struct MFT_REC *rec; + u64 vbo = (u64)rno << sbi->record_bits; + + err = mi_init(mi, sbi, rno); + if (err) + return err; + + rec = mi->mrec; + + if (rno == MFT_REC_MFT) { + ; + } else if (rno < MFT_REC_FREE) { + seq = rno; + } else if (rno >= sbi->mft.used) { + ; + } else if (mi_read(mi, is_mft)) { + ; + } else if (rec->rhdr.sign == NTFS_FILE_SIGNATURE) { + /* Record is reused. Update its sequence number */ + seq = le16_to_cpu(rec->seq) + 1; + if (!seq) + seq = 1; + } + + memcpy(rec, sbi->new_rec, sbi->record_size); + + rec->seq = cpu_to_le16(seq); + rec->flags = RECORD_FLAG_IN_USE | flags; + + mi->dirty = true; + + if (!mi->nb.nbufs) { + struct ntfs_inode *ni = sbi->mft.ni; + bool lock = false; + + if (is_mounted(sbi) && !is_mft) { + down_read(&ni->file.run_lock); + lock = true; + } + + err = ntfs_get_bh(sbi, &ni->file.run, vbo, sbi->record_size, + &mi->nb); + if (lock) + up_read(&ni->file.run_lock); + } + + return err; +} + +/* + * mi_mark_free + * + * marks record as unused and marks it as free in bitmap + */ +void mi_mark_free(struct mft_inode *mi) +{ + CLST rno = mi->rno; + struct ntfs_sb_info *sbi = mi->sbi; + + if (rno >= MFT_REC_RESERVED && rno < MFT_REC_FREE) { + ntfs_clear_mft_tail(sbi, rno, rno + 1); + mi->dirty = false; + return; + } + + if (mi->mrec) { + clear_rec_inuse(mi->mrec); + mi->dirty = true; + mi_write(mi, 0); + } + ntfs_mark_rec_free(sbi, rno); +} + +/* + * mi_insert_attr + * + * reserves space for new attribute + * returns not full constructed attribute or NULL if not possible to create + */ +struct ATTRIB *mi_insert_attr(struct mft_inode *mi, enum ATTR_TYPE type, + const __le16 *name, u8 name_len, u32 asize, + u16 name_off) +{ + size_t tail; + struct ATTRIB *attr; + __le16 id; + struct MFT_REC *rec = mi->mrec; + struct ntfs_sb_info *sbi = mi->sbi; + u32 used = le32_to_cpu(rec->used); + const u16 *upcase = sbi->upcase; + int diff; + + /* Can we insert mi attribute? */ + if (used + asize > mi->sbi->record_size) + return NULL; + + /* + * Scan through the list of attributes to find the point + * at which we should insert it. + */ + attr = NULL; + while ((attr = mi_enum_attr(mi, attr))) { + diff = compare_attr(attr, type, name, name_len, upcase); + if (diff > 0) + break; + if (diff < 0) + continue; + + if (!is_attr_indexed(attr)) + return NULL; + break; + } + + if (!attr) { + tail = 8; /* not used, just to suppress warning */ + attr = Add2Ptr(rec, used - 8); + } else { + tail = used - PtrOffset(rec, attr); + } + + id = mi_new_attt_id(mi); + + memmove(Add2Ptr(attr, asize), attr, tail); + memset(attr, 0, asize); + + attr->type = type; + attr->size = cpu_to_le32(asize); + attr->name_len = name_len; + attr->name_off = cpu_to_le16(name_off); + attr->id = id; + + memmove(Add2Ptr(attr, name_off), name, name_len * sizeof(short)); + rec->used = cpu_to_le32(used + asize); + + mi->dirty = true; + + return attr; +} + +/* + * mi_remove_attr + * + * removes the attribute from record + * NOTE: The source attr will point to next attribute + */ +bool mi_remove_attr(struct mft_inode *mi, struct ATTRIB *attr) +{ + struct MFT_REC *rec = mi->mrec; + u32 aoff = PtrOffset(rec, attr); + u32 used = le32_to_cpu(rec->used); + u32 asize = le32_to_cpu(attr->size); + + if (aoff + asize > used) + return false; + + used -= asize; + memmove(attr, Add2Ptr(attr, asize), used - aoff); + rec->used = cpu_to_le32(used); + mi->dirty = true; + + return true; +} + +/* bytes = "new attribute size" - "old attribute size" */ +bool mi_resize_attr(struct mft_inode *mi, struct ATTRIB *attr, int bytes) +{ + struct MFT_REC *rec = mi->mrec; + u32 aoff = PtrOffset(rec, attr); + u32 total, used = le32_to_cpu(rec->used); + u32 nsize, asize = le32_to_cpu(attr->size); + u32 rsize = le32_to_cpu(attr->res.data_size); + int tail = (int)(used - aoff - asize); + int dsize; + char *next; + + if (tail < 0 || aoff >= used) + return false; + + if (!bytes) + return true; + + total = le32_to_cpu(rec->total); + next = Add2Ptr(attr, asize); + + if (bytes > 0) { + dsize = QuadAlign(bytes); + if (used + dsize > total) + return false; + nsize = asize + dsize; + // move tail + memmove(next + dsize, next, tail); + memset(next, 0, dsize); + used += dsize; + rsize += dsize; + } else { + dsize = QuadAlign(-bytes); + if (dsize > asize) + return false; + nsize = asize - dsize; + memmove(next - dsize, next, tail); + used -= dsize; + rsize -= dsize; + } + + rec->used = cpu_to_le32(used); + attr->size = cpu_to_le32(nsize); + if (!attr->non_res) + attr->res.data_size = cpu_to_le32(rsize); + mi->dirty = true; + + return true; +} + +int mi_pack_runs(struct mft_inode *mi, struct ATTRIB *attr, + struct runs_tree *run, CLST len) +{ + int err = 0; + struct ntfs_sb_info *sbi = mi->sbi; + u32 new_run_size; + CLST plen; + struct MFT_REC *rec = mi->mrec; + CLST svcn = le64_to_cpu(attr->nres.svcn); + u32 used = le32_to_cpu(rec->used); + u32 aoff = PtrOffset(rec, attr); + u32 asize = le32_to_cpu(attr->size); + char *next = Add2Ptr(attr, asize); + u16 run_off = le16_to_cpu(attr->nres.run_off); + u32 run_size = asize - run_off; + u32 tail = used - aoff - asize; + u32 dsize = sbi->record_size - used; + + /* Make a maximum gap in current record */ + memmove(next + dsize, next, tail); + + /* Pack as much as possible */ + err = run_pack(run, svcn, len, Add2Ptr(attr, run_off), run_size + dsize, + &plen); + if (err < 0) { + memmove(next, next + dsize, tail); + return err; + } + + new_run_size = QuadAlign(err); + + memmove(next + new_run_size - run_size, next + dsize, tail); + + attr->size = cpu_to_le32(asize + new_run_size - run_size); + attr->nres.evcn = cpu_to_le64(svcn + plen - 1); + rec->used = cpu_to_le32(used + new_run_size - run_size); + mi->dirty = true; + + return 0; +} diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c new file mode 100644 index 000000000000..5cdf6efe67e0 --- /dev/null +++ b/fs/ntfs3/run.c @@ -0,0 +1,1111 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + * TODO: try to use extents tree (instead of array) + */ + +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" + +/* runs_tree is a continues memory. Try to avoid big size */ +#define NTFS3_RUN_MAX_BYTES 0x10000 + +struct ntfs_run { + CLST vcn; /* virtual cluster number */ + CLST len; /* length in clusters */ + CLST lcn; /* logical cluster number */ +}; + +/* + * run_lookup + * + * Lookup the index of a MCB entry that is first <= vcn. + * case of success it will return non-zero value and set + * 'index' parameter to index of entry been found. + * case of entry missing from list 'index' will be set to + * point to insertion position for the entry question. + */ +bool run_lookup(const struct runs_tree *run, CLST vcn, size_t *index) +{ + size_t min_idx, max_idx, mid_idx; + struct ntfs_run *r; + + if (!run->count) { + *index = 0; + return false; + } + + min_idx = 0; + max_idx = run->count - 1; + + /* Check boundary cases specially, 'cause they cover the often requests */ + r = run->runs; + if (vcn < r->vcn) { + *index = 0; + return false; + } + + if (vcn < r->vcn + r->len) { + *index = 0; + return true; + } + + r += max_idx; + if (vcn >= r->vcn + r->len) { + *index = run->count; + return false; + } + + if (vcn >= r->vcn) { + *index = max_idx; + return true; + } + + do { + mid_idx = min_idx + ((max_idx - min_idx) >> 1); + r = run->runs + mid_idx; + + if (vcn < r->vcn) { + max_idx = mid_idx - 1; + if (!mid_idx) + break; + } else if (vcn >= r->vcn + r->len) { + min_idx = mid_idx + 1; + } else { + *index = mid_idx; + return true; + } + } while (min_idx <= max_idx); + + *index = max_idx + 1; + return false; +} + +/* + * run_consolidate + * + * consolidate runs starting from a given one. + */ +static void run_consolidate(struct runs_tree *run, size_t index) +{ + size_t i; + struct ntfs_run *r = run->runs + index; + + while (index + 1 < run->count) { + /* + * I should merge current run with next + * if start of the next run lies inside one being tested. + */ + struct ntfs_run *n = r + 1; + CLST end = r->vcn + r->len; + CLST dl; + + /* Stop if runs are not aligned one to another. */ + if (n->vcn > end) + break; + + dl = end - n->vcn; + + /* + * If range at index overlaps with next one + * then I will either adjust it's start position + * or (if completely matches) dust remove one from the list. + */ + if (dl > 0) { + if (n->len <= dl) + goto remove_next_range; + + n->len -= dl; + n->vcn += dl; + if (n->lcn != SPARSE_LCN) + n->lcn += dl; + dl = 0; + } + + /* + * Stop if sparse mode does not match + * both current and next runs. + */ + if ((n->lcn == SPARSE_LCN) != (r->lcn == SPARSE_LCN)) { + index += 1; + r = n; + continue; + } + + /* + * Check if volume block + * of a next run lcn does not match + * last volume block of the current run. + */ + if (n->lcn != SPARSE_LCN && n->lcn != r->lcn + r->len) + break; + + /* + * Next and current are siblings. + * Eat/join. + */ + r->len += n->len - dl; + +remove_next_range: + i = run->count - (index + 1); + if (i > 1) + memmove(n, n + 1, sizeof(*n) * (i - 1)); + + run->count -= 1; + } +} + +/* returns true if range [svcn - evcn] is mapped*/ +bool run_is_mapped_full(const struct runs_tree *run, CLST svcn, CLST evcn) +{ + size_t i; + const struct ntfs_run *r, *end; + CLST next_vcn; + + if (!run_lookup(run, svcn, &i)) + return false; + + end = run->runs + run->count; + r = run->runs + i; + + for (;;) { + next_vcn = r->vcn + r->len; + if (next_vcn > evcn) + return true; + + if (++r >= end) + return false; + + if (r->vcn != next_vcn) + return false; + } +} + +bool run_lookup_entry(const struct runs_tree *run, CLST vcn, CLST *lcn, + CLST *len, size_t *index) +{ + size_t idx; + CLST gap; + struct ntfs_run *r; + + /* Fail immediately if nrun was not touched yet. */ + if (!run->runs) + return false; + + if (!run_lookup(run, vcn, &idx)) + return false; + + r = run->runs + idx; + + if (vcn >= r->vcn + r->len) + return false; + + gap = vcn - r->vcn; + if (r->len <= gap) + return false; + + *lcn = r->lcn == SPARSE_LCN ? SPARSE_LCN : (r->lcn + gap); + + if (len) + *len = r->len - gap; + if (index) + *index = idx; + + return true; +} + +/* + * run_truncate_head + * + * decommit the range before vcn + */ +void run_truncate_head(struct runs_tree *run, CLST vcn) +{ + size_t index; + struct ntfs_run *r; + + if (run_lookup(run, vcn, &index)) { + r = run->runs + index; + + if (vcn > r->vcn) { + CLST dlen = vcn - r->vcn; + + r->vcn = vcn; + r->len -= dlen; + if (r->lcn != SPARSE_LCN) + r->lcn += dlen; + } + + if (!index) + return; + } + r = run->runs; + memmove(r, r + index, sizeof(*r) * (run->count - index)); + + run->count -= index; + + if (!run->count) { + ntfs_vfree(run->runs); + run->runs = NULL; + run->allocated = 0; + } +} + +/* + * run_truncate + * + * decommit the range after vcn + */ +void run_truncate(struct runs_tree *run, CLST vcn) +{ + size_t index; + + /* + * If I hit the range then + * I have to truncate one. + * If range to be truncated is becoming empty + * then it will entirely be removed. + */ + if (run_lookup(run, vcn, &index)) { + struct ntfs_run *r = run->runs + index; + + r->len = vcn - r->vcn; + + if (r->len > 0) + index += 1; + } + + /* + * At this point 'index' is set to + * position that should be thrown away (including index itself) + * Simple one - just set the limit. + */ + run->count = index; + + /* Do not reallocate array 'runs'. Only free if possible */ + if (!index) { + ntfs_vfree(run->runs); + run->runs = NULL; + run->allocated = 0; + } +} + +/* trim head and tail if necessary*/ +void run_truncate_around(struct runs_tree *run, CLST vcn) +{ + run_truncate_head(run, vcn); + + if (run->count >= NTFS3_RUN_MAX_BYTES / sizeof(struct ntfs_run) / 2) + run_truncate(run, (run->runs + (run->count >> 1))->vcn); +} + +/* + * run_add_entry + * + * sets location to known state. + * run to be added may overlap with existing location. + * returns false if of memory + */ +bool run_add_entry(struct runs_tree *run, CLST vcn, CLST lcn, CLST len, + bool is_mft) +{ + size_t used, index; + struct ntfs_run *r; + bool inrange; + CLST tail_vcn = 0, tail_len = 0, tail_lcn = 0; + bool should_add_tail = false; + + /* + * Lookup the insertion point. + * + * Execute bsearch for the entry containing + * start position question. + */ + inrange = run_lookup(run, vcn, &index); + + /* + * Shortcut here would be case of + * range not been found but one been added + * continues previous run. + * this case I can directly make use of + * existing range as my start point. + */ + if (!inrange && index > 0) { + struct ntfs_run *t = run->runs + index - 1; + + if (t->vcn + t->len == vcn && + (t->lcn == SPARSE_LCN) == (lcn == SPARSE_LCN) && + (lcn == SPARSE_LCN || lcn == t->lcn + t->len)) { + inrange = true; + index -= 1; + } + } + + /* + * At this point 'index' either points to the range + * containing start position or to the insertion position + * for a new range. + * So first let's check if range I'm probing is here already. + */ + if (!inrange) { +requires_new_range: + /* + * Range was not found. + * Insert at position 'index' + */ + used = run->count * sizeof(struct ntfs_run); + + /* + * Check allocated space. + * If one is not enough to get one more entry + * then it will be reallocated + */ + if (run->allocated < used + sizeof(struct ntfs_run)) { + size_t bytes; + struct ntfs_run *new_ptr; + + /* Use power of 2 for 'bytes'*/ + if (!used) { + bytes = 64; + } else if (used <= 16 * PAGE_SIZE) { + if (is_power_of2(run->allocated)) + bytes = run->allocated << 1; + else + bytes = (size_t)1 + << (2 + blksize_bits(used)); + } else { + bytes = run->allocated + (16 * PAGE_SIZE); + } + + WARN_ON(!is_mft && bytes > NTFS3_RUN_MAX_BYTES); + + new_ptr = ntfs_vmalloc(bytes); + + if (!new_ptr) + return false; + + r = new_ptr + index; + memcpy(new_ptr, run->runs, + index * sizeof(struct ntfs_run)); + memcpy(r + 1, run->runs + index, + sizeof(struct ntfs_run) * (run->count - index)); + + ntfs_vfree(run->runs); + run->runs = new_ptr; + run->allocated = bytes; + + } else { + size_t i = run->count - index; + + r = run->runs + index; + + /* memmove appears to be a bottle neck here... */ + if (i > 0) + memmove(r + 1, r, sizeof(struct ntfs_run) * i); + } + + r->vcn = vcn; + r->lcn = lcn; + r->len = len; + run->count += 1; + } else { + r = run->runs + index; + + /* + * If one of ranges was not allocated + * then I have to split location I just matched. + * and insert current one + * a common case this requires tail to be reinserted + * a recursive call. + */ + if (((lcn == SPARSE_LCN) != (r->lcn == SPARSE_LCN)) || + (lcn != SPARSE_LCN && lcn != r->lcn + (vcn - r->vcn))) { + CLST to_eat = vcn - r->vcn; + CLST Tovcn = to_eat + len; + + should_add_tail = Tovcn < r->len; + + if (should_add_tail) { + tail_lcn = r->lcn == SPARSE_LCN + ? SPARSE_LCN + : (r->lcn + Tovcn); + tail_vcn = r->vcn + Tovcn; + tail_len = r->len - Tovcn; + } + + if (to_eat > 0) { + r->len = to_eat; + inrange = false; + index += 1; + goto requires_new_range; + } + + /* lcn should match one I'm going to add. */ + r->lcn = lcn; + } + + /* + * If existing range fits then I'm done. + * Otherwise extend found one and fall back to range jocode. + */ + if (r->vcn + r->len < vcn + len) + r->len += len - ((r->vcn + r->len) - vcn); + } + + /* + * And normalize it starting from insertion point. + * It's possible that no insertion needed case if + * start point lies within the range of an entry + * that 'index' points to. + */ + if (inrange && index > 0) + index -= 1; + run_consolidate(run, index); + run_consolidate(run, index + 1); + + /* + * a special case + * I have to add extra range a tail. + */ + if (should_add_tail && + !run_add_entry(run, tail_vcn, tail_lcn, tail_len, is_mft)) + return false; + + return true; +} + +/*helper for attr_collapse_range, which is helper for fallocate(collapse_range)*/ +bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len) +{ + size_t index, eat; + struct ntfs_run *r, *e, *eat_start, *eat_end; + CLST end; + + if (WARN_ON(!run_lookup(run, vcn, &index))) + return true; /* should never be here */ + + e = run->runs + run->count; + r = run->runs + index; + end = vcn + len; + + if (vcn > r->vcn) { + if (r->vcn + r->len <= end) { + /* collapse tail of run */ + r->len = vcn - r->vcn; + } else if (r->lcn == SPARSE_LCN) { + /* collapse a middle part of sparsed run */ + r->len -= len; + } else { + /* collapse a middle part of normal run, split */ + if (!run_add_entry(run, vcn, SPARSE_LCN, len, false)) + return false; + return run_collapse_range(run, vcn, len); + } + + r += 1; + } + + eat_start = r; + eat_end = r; + + for (; r < e; r++) { + CLST d; + + if (r->vcn >= end) { + r->vcn -= len; + continue; + } + + if (r->vcn + r->len <= end) { + /* eat this run */ + eat_end = r + 1; + continue; + } + + d = end - r->vcn; + if (r->lcn != SPARSE_LCN) + r->lcn += d; + r->len -= d; + r->vcn -= len - d; + } + + eat = eat_end - eat_start; + memmove(eat_start, eat_end, (e - eat_end) * sizeof(*r)); + run->count -= eat; + + return true; +} + +/* + * run_get_entry + * + * returns index-th mapped region + */ +bool run_get_entry(const struct runs_tree *run, size_t index, CLST *vcn, + CLST *lcn, CLST *len) +{ + const struct ntfs_run *r; + + if (index >= run->count) + return false; + + r = run->runs + index; + + if (!r->len) + return false; + + if (vcn) + *vcn = r->vcn; + if (lcn) + *lcn = r->lcn; + if (len) + *len = r->len; + return true; +} + +/* + * run_packed_size + * + * calculates the size of packed int64 + */ +#ifdef __BIG_ENDIAN +static inline int run_packed_size(const s64 n) +{ + const u8 *p = (const u8 *)&n + sizeof(n) - 1; + + if (n >= 0) { + if (p[-7] || p[-6] || p[-5] || p[-4]) + p -= 4; + if (p[-3] || p[-2]) + p -= 2; + if (p[-1]) + p -= 1; + if (p[0] & 0x80) + p -= 1; + } else { + if (p[-7] != 0xff || p[-6] != 0xff || p[-5] != 0xff || + p[-4] != 0xff) + p -= 4; + if (p[-3] != 0xff || p[-2] != 0xff) + p -= 2; + if (p[-1] != 0xff) + p -= 1; + if (!(p[0] & 0x80)) + p -= 1; + } + return (const u8 *)&n + sizeof(n) - p; +} + +/* full trusted function. It does not check 'size' for errors */ +static inline void run_pack_s64(u8 *run_buf, u8 size, s64 v) +{ + const u8 *p = (u8 *)&v; + + switch (size) { + case 8: + run_buf[7] = p[0]; + fallthrough; + case 7: + run_buf[6] = p[1]; + fallthrough; + case 6: + run_buf[5] = p[2]; + fallthrough; + case 5: + run_buf[4] = p[3]; + fallthrough; + case 4: + run_buf[3] = p[4]; + fallthrough; + case 3: + run_buf[2] = p[5]; + fallthrough; + case 2: + run_buf[1] = p[6]; + fallthrough; + case 1: + run_buf[0] = p[7]; + } +} + +/* full trusted function. It does not check 'size' for errors */ +static inline s64 run_unpack_s64(const u8 *run_buf, u8 size, s64 v) +{ + u8 *p = (u8 *)&v; + + switch (size) { + case 8: + p[0] = run_buf[7]; + fallthrough; + case 7: + p[1] = run_buf[6]; + fallthrough; + case 6: + p[2] = run_buf[5]; + fallthrough; + case 5: + p[3] = run_buf[4]; + fallthrough; + case 4: + p[4] = run_buf[3]; + fallthrough; + case 3: + p[5] = run_buf[2]; + fallthrough; + case 2: + p[6] = run_buf[1]; + fallthrough; + case 1: + p[7] = run_buf[0]; + } + return v; +} + +#else + +static inline int run_packed_size(const s64 n) +{ + const u8 *p = (const u8 *)&n; + + if (n >= 0) { + if (p[7] || p[6] || p[5] || p[4]) + p += 4; + if (p[3] || p[2]) + p += 2; + if (p[1]) + p += 1; + if (p[0] & 0x80) + p += 1; + } else { + if (p[7] != 0xff || p[6] != 0xff || p[5] != 0xff || + p[4] != 0xff) + p += 4; + if (p[3] != 0xff || p[2] != 0xff) + p += 2; + if (p[1] != 0xff) + p += 1; + if (!(p[0] & 0x80)) + p += 1; + } + + return 1 + p - (const u8 *)&n; +} + +/* full trusted function. It does not check 'size' for errors */ +static inline void run_pack_s64(u8 *run_buf, u8 size, s64 v) +{ + const u8 *p = (u8 *)&v; + + /* memcpy( run_buf, &v, size); is it faster? */ + switch (size) { + case 8: + run_buf[7] = p[7]; + fallthrough; + case 7: + run_buf[6] = p[6]; + fallthrough; + case 6: + run_buf[5] = p[5]; + fallthrough; + case 5: + run_buf[4] = p[4]; + fallthrough; + case 4: + run_buf[3] = p[3]; + fallthrough; + case 3: + run_buf[2] = p[2]; + fallthrough; + case 2: + run_buf[1] = p[1]; + fallthrough; + case 1: + run_buf[0] = p[0]; + } +} + +/* full trusted function. It does not check 'size' for errors */ +static inline s64 run_unpack_s64(const u8 *run_buf, u8 size, s64 v) +{ + u8 *p = (u8 *)&v; + + /* memcpy( &v, run_buf, size); is it faster? */ + switch (size) { + case 8: + p[7] = run_buf[7]; + fallthrough; + case 7: + p[6] = run_buf[6]; + fallthrough; + case 6: + p[5] = run_buf[5]; + fallthrough; + case 5: + p[4] = run_buf[4]; + fallthrough; + case 4: + p[3] = run_buf[3]; + fallthrough; + case 3: + p[2] = run_buf[2]; + fallthrough; + case 2: + p[1] = run_buf[1]; + fallthrough; + case 1: + p[0] = run_buf[0]; + } + return v; +} +#endif + +/* + * run_pack + * + * packs runs into buffer + * packed_vcns - how much runs we have packed + * packed_size - how much bytes we have used run_buf + */ +int run_pack(const struct runs_tree *run, CLST svcn, CLST len, u8 *run_buf, + u32 run_buf_size, CLST *packed_vcns) +{ + CLST next_vcn, vcn, lcn; + CLST prev_lcn = 0; + CLST evcn1 = svcn + len; + int packed_size = 0; + size_t i; + bool ok; + s64 dlcn; + int offset_size, size_size, tmp; + + next_vcn = vcn = svcn; + + *packed_vcns = 0; + + if (!len) + goto out; + + ok = run_lookup_entry(run, vcn, &lcn, &len, &i); + + if (!ok) + goto error; + + if (next_vcn != vcn) + goto error; + + for (;;) { + next_vcn = vcn + len; + if (next_vcn > evcn1) + len = evcn1 - vcn; + + /* how much bytes required to pack len */ + size_size = run_packed_size(len); + + /* offset_size - how much bytes is packed dlcn */ + if (lcn == SPARSE_LCN) { + offset_size = 0; + dlcn = 0; + } else { + /* NOTE: lcn can be less than prev_lcn! */ + dlcn = (s64)lcn - prev_lcn; + offset_size = run_packed_size(dlcn); + prev_lcn = lcn; + } + + tmp = run_buf_size - packed_size - 2 - offset_size; + if (tmp <= 0) + goto out; + + /* can we store this entire run */ + if (tmp < size_size) + goto out; + + if (run_buf) { + /* pack run header */ + run_buf[0] = ((u8)(size_size | (offset_size << 4))); + run_buf += 1; + + /* Pack the length of run */ + run_pack_s64(run_buf, size_size, len); + + run_buf += size_size; + /* Pack the offset from previous lcn */ + run_pack_s64(run_buf, offset_size, dlcn); + run_buf += offset_size; + } + + packed_size += 1 + offset_size + size_size; + *packed_vcns += len; + + if (packed_size + 1 >= run_buf_size || next_vcn >= evcn1) + goto out; + + ok = run_get_entry(run, ++i, &vcn, &lcn, &len); + if (!ok) + goto error; + + if (next_vcn != vcn) + goto error; + } + +out: + /* Store last zero */ + if (run_buf) + run_buf[0] = 0; + + return packed_size + 1; + +error: + return -EOPNOTSUPP; +} + +/* + * run_unpack + * + * unpacks packed runs from "run_buf" + * returns error, if negative, or real used bytes + */ +int run_unpack(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, + CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf, + u32 run_buf_size) +{ + u64 prev_lcn, vcn64, lcn, next_vcn; + const u8 *run_last, *run_0; + bool is_mft = ino == MFT_REC_MFT; + + /* Check for empty */ + if (evcn + 1 == svcn) + return 0; + + if (evcn < svcn) + return -EINVAL; + + run_0 = run_buf; + run_last = run_buf + run_buf_size; + prev_lcn = 0; + vcn64 = svcn; + + /* Read all runs the chain */ + /* size_size - how much bytes is packed len */ + while (run_buf < run_last) { + /* size_size - how much bytes is packed len */ + u8 size_size = *run_buf & 0xF; + /* offset_size - how much bytes is packed dlcn */ + u8 offset_size = *run_buf++ >> 4; + u64 len; + + if (!size_size) + break; + + /* + * Unpack runs. + * NOTE: runs are stored little endian order + * "len" is unsigned value, "dlcn" is signed + * Large positive number requires to store 5 bytes + * e.g.: 05 FF 7E FF FF 00 00 00 + */ + if (size_size > 8) + return -EINVAL; + + len = run_unpack_s64(run_buf, size_size, 0); + /* skip size_size */ + run_buf += size_size; + + if (!len) + return -EINVAL; + + if (!offset_size) + lcn = SPARSE_LCN64; + else if (offset_size <= 8) { + s64 dlcn; + + /* initial value of dlcn is -1 or 0 */ + dlcn = (run_buf[offset_size - 1] & 0x80) ? (s64)-1 : 0; + dlcn = run_unpack_s64(run_buf, offset_size, dlcn); + /* skip offset_size */ + run_buf += offset_size; + + if (!dlcn) + return -EINVAL; + lcn = prev_lcn + dlcn; + prev_lcn = lcn; + } else + return -EINVAL; + + next_vcn = vcn64 + len; + /* check boundary */ + if (next_vcn > evcn + 1) + return -EINVAL; + +#ifndef CONFIG_NTFS3_64BIT_CLUSTER + if (next_vcn > 0x100000000ull || (lcn + len) > 0x100000000ull) { + ntfs_err( + sbi->sb, + "This driver is compiled whitout CONFIG_NTFS3_64BIT_CLUSTER (like windows driver).\n" + "Volume contains 64 bits run: vcn %llx, lcn %llx, len %llx.\n" + "Activate CONFIG_NTFS3_64BIT_CLUSTER to process this case", + vcn64, lcn, len); + return -EOPNOTSUPP; + } +#endif + if (lcn != SPARSE_LCN64 && lcn + len > sbi->used.bitmap.nbits) { + /* lcn range is out of volume */ + return -EINVAL; + } + + if (!run) + ; /* called from check_attr(fslog.c) to check run */ + else if (run == RUN_DEALLOCATE) { + /* called from ni_delete_all to free clusters without storing in run */ + if (lcn != SPARSE_LCN64) + mark_as_free_ex(sbi, lcn, len, true); + } else if (vcn64 >= vcn) { + if (!run_add_entry(run, vcn64, lcn, len, is_mft)) + return -ENOMEM; + } else if (next_vcn > vcn) { + u64 dlen = vcn - vcn64; + + if (!run_add_entry(run, vcn, lcn + dlen, len - dlen, + is_mft)) + return -ENOMEM; + } + + vcn64 = next_vcn; + } + + if (vcn64 != evcn + 1) { + /* not expected length of unpacked runs */ + return -EINVAL; + } + + return run_buf - run_0; +} + +#ifdef NTFS3_CHECK_FREE_CLST +/* + * run_unpack_ex + * + * unpacks packed runs from "run_buf" + * checks unpacked runs to be used in bitmap + * returns error, if negative, or real used bytes + */ +int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino, + CLST svcn, CLST evcn, CLST vcn, const u8 *run_buf, + u32 run_buf_size) +{ + int ret, err; + CLST next_vcn, lcn, len; + size_t index; + bool ok; + struct wnd_bitmap *wnd; + + ret = run_unpack(run, sbi, ino, svcn, evcn, vcn, run_buf, run_buf_size); + if (ret <= 0) + return ret; + + if (!sbi->used.bitmap.sb || !run || run == RUN_DEALLOCATE) + return ret; + + if (ino == MFT_REC_BADCLUST) + return ret; + + next_vcn = vcn = svcn; + wnd = &sbi->used.bitmap; + + for (ok = run_lookup_entry(run, vcn, &lcn, &len, &index); + next_vcn <= evcn; + ok = run_get_entry(run, ++index, &vcn, &lcn, &len)) { + if (!ok || next_vcn != vcn) + return -EINVAL; + + next_vcn = vcn + len; + + if (lcn == SPARSE_LCN) + continue; + + if (sbi->flags & NTFS_FLAGS_NEED_REPLAY) + continue; + + down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS); + /* Check for free blocks */ + ok = wnd_is_used(wnd, lcn, len); + up_read(&wnd->rw_lock); + if (ok) + continue; + + /* Looks like volume is corrupted */ + ntfs_set_state(sbi, NTFS_DIRTY_ERROR); + + if (down_write_trylock(&wnd->rw_lock)) { + /* mark all zero bits as used in range [lcn, lcn+len) */ + CLST i, lcn_f = 0, len_f = 0; + + err = 0; + for (i = 0; i < len; i++) { + if (wnd_is_free(wnd, lcn + i, 1)) { + if (!len_f) + lcn_f = lcn + i; + len_f += 1; + } else if (len_f) { + err = wnd_set_used(wnd, lcn_f, len_f); + len_f = 0; + if (err) + break; + } + } + + if (len_f) + err = wnd_set_used(wnd, lcn_f, len_f); + + up_write(&wnd->rw_lock); + if (err) + return err; + } + } + + return ret; +} +#endif + +/* + * run_get_highest_vcn + * + * returns the highest vcn from a mapping pairs array + * it used while replaying log file + */ +int run_get_highest_vcn(CLST vcn, const u8 *run_buf, u64 *highest_vcn) +{ + u64 vcn64 = vcn; + u8 size_size; + + while ((size_size = *run_buf & 0xF)) { + u8 offset_size = *run_buf++ >> 4; + u64 len; + + if (size_size > 8 || offset_size > 8) + return -EINVAL; + + len = run_unpack_s64(run_buf, size_size, 0); + if (!len) + return -EINVAL; + + run_buf += size_size + offset_size; + vcn64 += len; + +#ifndef CONFIG_NTFS3_64BIT_CLUSTER + if (vcn64 > 0x100000000ull) + return -EINVAL; +#endif + } + + *highest_vcn = vcn64 - 1; + return 0; +} diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c new file mode 100644 index 000000000000..961782c76924 --- /dev/null +++ b/fs/ntfs3/super.c @@ -0,0 +1,1501 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + * + * terminology + * + * cluster - allocation unit - 512,1K,2K,4K,...,2M + * vcn - virtual cluster number - offset inside the file in clusters + * vbo - virtual byte offset - offset inside the file in bytes + * lcn - logical cluster number - 0 based cluster in clusters heap + * lbo - logical byte offset - absolute position inside volume + * run - maps vcn to lcn - stored in attributes in packed form + * attr - attribute segment - std/name/data etc records inside MFT + * mi - mft inode - one MFT record(usually 1024 bytes or 4K), consists of attributes + * ni - ntfs inode - extends linux inode. consists of one or more mft inodes + * index - unit inside directory - 2K, 4K, <=page size, does not depend on cluster size + * + * TODO: Implement + * https://docs.microsoft.com/en-us/windows/wsl/file-permissions + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" +#ifdef CONFIG_NTFS3_LZX_XPRESS +#include "lib/lib.h" +#endif + +#ifdef CONFIG_PRINTK +/* + * Trace warnings/notices/errors + * Thanks Joe Perches for implementation + */ +void ntfs_printk(const struct super_block *sb, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + int level; + struct ntfs_sb_info *sbi = sb->s_fs_info; + + /*should we use different ratelimits for warnings/notices/errors? */ + if (!___ratelimit(&sbi->msg_ratelimit, "ntfs3")) + return; + + va_start(args, fmt); + + level = printk_get_level(fmt); + vaf.fmt = printk_skip_level(fmt); + vaf.va = &args; + printk("%c%cntfs3: %s: %pV\n", KERN_SOH_ASCII, level, sb->s_id, &vaf); + + va_end(args); +} + +static char s_name_buf[512]; +static atomic_t s_name_buf_cnt = ATOMIC_INIT(1); // 1 means 'free s_name_buf' + +/* print warnings/notices/errors about inode using name or inode number */ +void ntfs_inode_printk(struct inode *inode, const char *fmt, ...) +{ + struct super_block *sb = inode->i_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + char *name; + va_list args; + struct va_format vaf; + int level; + + if (!___ratelimit(&sbi->msg_ratelimit, "ntfs3")) + return; + + if (atomic_dec_and_test(&s_name_buf_cnt)) { + /* use static allocated buffer */ + name = s_name_buf; + } else { + name = kmalloc(sizeof(s_name_buf), GFP_NOFS); + } + + if (name) { + struct dentry *dentry = d_find_alias(inode); + const u32 name_len = ARRAY_SIZE(s_name_buf) - 1; + + if (dentry) { + spin_lock(&dentry->d_lock); + snprintf(name, name_len, "%s", dentry->d_name.name); + spin_unlock(&dentry->d_lock); + dput(dentry); + name[name_len] = 0; /* to be sure*/ + } else { + name[0] = 0; + } + } + + va_start(args, fmt); + + level = printk_get_level(fmt); + vaf.fmt = printk_skip_level(fmt); + vaf.va = &args; + + printk("%c%cntfs3: %s: ino=%lx, \"%s\" %pV\n", KERN_SOH_ASCII, level, + sb->s_id, inode->i_ino, name ? name : "", &vaf); + + va_end(args); + + atomic_inc(&s_name_buf_cnt); + if (name != s_name_buf) + kfree(name); +} +#endif + +/* + * Shared memory struct. + * + * on-disk ntfs's upcase table is created by ntfs formater + * 'upcase' table is 128K bytes of memory + * we should read it into memory when mounting + * Several ntfs volumes likely use the same 'upcase' table + * It is good idea to share in-memory 'upcase' table between different volumes + * Unfortunately winxp/vista/win7 use different upcase tables + */ +static DEFINE_SPINLOCK(s_shared_lock); + +static struct { + void *ptr; + u32 len; + int cnt; +} s_shared[8]; + +/* + * ntfs_set_shared + * + * Returns 'ptr' if pointer was saved in shared memory + * Returns NULL if pointer was not shared + */ +void *ntfs_set_shared(void *ptr, u32 bytes) +{ + void *ret = NULL; + int i, j = -1; + + spin_lock(&s_shared_lock); + for (i = 0; i < ARRAY_SIZE(s_shared); i++) { + if (!s_shared[i].cnt) { + j = i; + } else if (bytes == s_shared[i].len && + !memcmp(s_shared[i].ptr, ptr, bytes)) { + s_shared[i].cnt += 1; + ret = s_shared[i].ptr; + break; + } + } + + if (!ret && j != -1) { + s_shared[j].ptr = ptr; + s_shared[j].len = bytes; + s_shared[j].cnt = 1; + ret = ptr; + } + spin_unlock(&s_shared_lock); + + return ret; +} + +/* + * ntfs_put_shared + * + * Returns 'ptr' if pointer is not shared anymore + * Returns NULL if pointer is still shared + */ +void *ntfs_put_shared(void *ptr) +{ + void *ret = ptr; + int i; + + spin_lock(&s_shared_lock); + for (i = 0; i < ARRAY_SIZE(s_shared); i++) { + if (s_shared[i].cnt && s_shared[i].ptr == ptr) { + if (--s_shared[i].cnt) + ret = NULL; + break; + } + } + spin_unlock(&s_shared_lock); + + return ret; +} + +static inline void clear_mount_options(struct ntfs_mount_options *options) +{ + unload_nls(options->nls); +} + +enum Opt { + Opt_uid, + Opt_gid, + Opt_umask, + Opt_dmask, + Opt_fmask, + Opt_immutable, + Opt_discard, + Opt_force, + Opt_sparse, + Opt_nohidden, + Opt_showmeta, + Opt_acl, + Opt_noatime, + Opt_nls, + Opt_prealloc, + Opt_no_acs_rules, + Opt_err, +}; + +static const match_table_t ntfs_tokens = { + { Opt_uid, "uid=%u" }, + { Opt_gid, "gid=%u" }, + { Opt_umask, "umask=%o" }, + { Opt_dmask, "dmask=%o" }, + { Opt_fmask, "fmask=%o" }, + { Opt_immutable, "sys_immutable" }, + { Opt_discard, "discard" }, + { Opt_force, "force" }, + { Opt_sparse, "sparse" }, + { Opt_nohidden, "nohidden" }, + { Opt_acl, "acl" }, + { Opt_noatime, "noatime" }, + { Opt_showmeta, "showmeta" }, + { Opt_nls, "nls=%s" }, + { Opt_prealloc, "prealloc" }, + { Opt_no_acs_rules, "no_acs_rules" }, + { Opt_err, NULL }, +}; + +static noinline int ntfs_parse_options(struct super_block *sb, char *options, + int silent, + struct ntfs_mount_options *opts) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + char nls_name[30]; + struct nls_table *nls; + + opts->fs_uid = current_uid(); + opts->fs_gid = current_gid(); + opts->fs_fmask_inv = opts->fs_dmask_inv = ~current_umask(); + nls_name[0] = 0; + + if (!options) + goto out; + + while ((p = strsep(&options, ","))) { + int token; + + if (!*p) + continue; + + token = match_token(p, ntfs_tokens, args); + switch (token) { + case Opt_immutable: + opts->sys_immutable = 1; + break; + case Opt_uid: + if (match_int(&args[0], &option)) + return -EINVAL; + opts->fs_uid = make_kuid(current_user_ns(), option); + if (!uid_valid(opts->fs_uid)) + return -EINVAL; + opts->uid = 1; + break; + case Opt_gid: + if (match_int(&args[0], &option)) + return -EINVAL; + opts->fs_gid = make_kgid(current_user_ns(), option); + if (!gid_valid(opts->fs_gid)) + return -EINVAL; + opts->gid = 1; + break; + case Opt_umask: + if (match_octal(&args[0], &option)) + return -EINVAL; + opts->fs_fmask_inv = opts->fs_dmask_inv = ~option; + opts->fmask = opts->dmask = 1; + break; + case Opt_dmask: + if (match_octal(&args[0], &option)) + return -EINVAL; + opts->fs_dmask_inv = ~option; + opts->dmask = 1; + break; + case Opt_fmask: + if (match_octal(&args[0], &option)) + return -EINVAL; + opts->fs_fmask_inv = ~option; + opts->fmask = 1; + break; + case Opt_discard: + opts->discard = 1; + break; + case Opt_force: + opts->force = 1; + break; + case Opt_sparse: + opts->sparse = 1; + break; + case Opt_nohidden: + opts->nohidden = 1; + break; + case Opt_acl: +#ifdef CONFIG_NTFS3_FS_POSIX_ACL + sb->s_flags |= SB_POSIXACL; + break; +#else + ntfs_err(sb, "support for ACL not compiled in!"); + return -EINVAL; +#endif + case Opt_noatime: + sb->s_flags |= SB_NOATIME; + break; + case Opt_showmeta: + opts->showmeta = 1; + break; + case Opt_nls: + match_strlcpy(nls_name, &args[0], sizeof(nls_name)); + break; + case Opt_prealloc: + opts->prealloc = 1; + break; + case Opt_no_acs_rules: + opts->no_acs_rules = 1; + break; + default: + if (!silent) + ntfs_err( + sb, + "Unrecognized mount option \"%s\" or missing value", + p); + //return -EINVAL; + } + } + +out: + if (!strcmp(nls_name[0] ? nls_name : CONFIG_NLS_DEFAULT, "utf8")) { + /* For UTF-8 use utf16s_to_utf8s/utf8s_to_utf16s instead of nls */ + nls = NULL; + } else if (nls_name[0]) { + nls = load_nls(nls_name); + if (!nls) { + ntfs_err(sb, "failed to load \"%s\"", nls_name); + return -EINVAL; + } + } else { + nls = load_nls_default(); + if (!nls) { + ntfs_err(sb, "failed to load default nls"); + return -EINVAL; + } + } + opts->nls = nls; + + return 0; +} + +static int ntfs_remount(struct super_block *sb, int *flags, char *data) +{ + int err, ro_rw; + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct ntfs_mount_options old_opts; + char *orig_data = kstrdup(data, GFP_KERNEL); + + if (data && !orig_data) + return -ENOMEM; + + /* Store original options */ + memcpy(&old_opts, &sbi->options, sizeof(old_opts)); + clear_mount_options(&sbi->options); + memset(&sbi->options, 0, sizeof(sbi->options)); + + err = ntfs_parse_options(sb, data, 0, &sbi->options); + if (err) + goto restore_opts; + + ro_rw = sb_rdonly(sb) && !(*flags & SB_RDONLY); + if (ro_rw && (sbi->flags & NTFS_FLAGS_NEED_REPLAY)) { + ntfs_warn( + sb, + "Couldn't remount rw because journal is not replayed. Please umount/remount instead\n"); + err = -EINVAL; + goto restore_opts; + } + + sync_filesystem(sb); + + if (ro_rw && (sbi->volume.flags & VOLUME_FLAG_DIRTY) && + !sbi->options.force) { + ntfs_warn(sb, "volume is dirty and \"force\" flag is not set!"); + err = -EINVAL; + goto restore_opts; + } + + clear_mount_options(&old_opts); + + *flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME) | + SB_NODIRATIME | SB_NOATIME; + ntfs_info(sb, "re-mounted. Opts: %s", orig_data); + err = 0; + goto out; + +restore_opts: + clear_mount_options(&sbi->options); + memcpy(&sbi->options, &old_opts, sizeof(old_opts)); + +out: + kfree(orig_data); + return err; +} + +static struct kmem_cache *ntfs_inode_cachep; + +static struct inode *ntfs_alloc_inode(struct super_block *sb) +{ + struct ntfs_inode *ni = kmem_cache_alloc(ntfs_inode_cachep, GFP_NOFS); + + if (!ni) + return NULL; + + memset(ni, 0, offsetof(struct ntfs_inode, vfs_inode)); + + mutex_init(&ni->ni_lock); + + return &ni->vfs_inode; +} + +static void ntfs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + struct ntfs_inode *ni = ntfs_i(inode); + + mutex_destroy(&ni->ni_lock); + + kmem_cache_free(ntfs_inode_cachep, ni); +} + +static void ntfs_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, ntfs_i_callback); +} + +static void init_once(void *foo) +{ + struct ntfs_inode *ni = foo; + + inode_init_once(&ni->vfs_inode); +} + +/* noinline to reduce binary size*/ +static noinline void put_ntfs(struct ntfs_sb_info *sbi) +{ + ntfs_free(sbi->new_rec); + ntfs_vfree(ntfs_put_shared(sbi->upcase)); + ntfs_free(sbi->def_table); + + wnd_close(&sbi->mft.bitmap); + wnd_close(&sbi->used.bitmap); + + if (sbi->mft.ni) + iput(&sbi->mft.ni->vfs_inode); + + if (sbi->security.ni) + iput(&sbi->security.ni->vfs_inode); + + if (sbi->reparse.ni) + iput(&sbi->reparse.ni->vfs_inode); + + if (sbi->objid.ni) + iput(&sbi->objid.ni->vfs_inode); + + if (sbi->volume.ni) + iput(&sbi->volume.ni->vfs_inode); + + ntfs_update_mftmirr(sbi, 0); + + indx_clear(&sbi->security.index_sii); + indx_clear(&sbi->security.index_sdh); + indx_clear(&sbi->reparse.index_r); + indx_clear(&sbi->objid.index_o); + ntfs_free(sbi->compress.lznt); +#ifdef CONFIG_NTFS3_LZX_XPRESS + xpress_free_decompressor(sbi->compress.xpress); + lzx_free_decompressor(sbi->compress.lzx); +#endif + clear_mount_options(&sbi->options); + + ntfs_free(sbi); +} + +static void ntfs_put_super(struct super_block *sb) +{ + struct ntfs_sb_info *sbi = sb->s_fs_info; + + /*mark rw ntfs as clear, if possible*/ + ntfs_set_state(sbi, NTFS_DIRTY_CLEAR); + + put_ntfs(sbi); + + sync_blockdev(sb->s_bdev); +} + +static int ntfs_statfs(struct dentry *dentry, struct kstatfs *buf) +{ + struct super_block *sb = dentry->d_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct wnd_bitmap *wnd = &sbi->used.bitmap; + + buf->f_type = sb->s_magic; + buf->f_bsize = sbi->cluster_size; + buf->f_blocks = wnd->nbits; + + buf->f_bfree = buf->f_bavail = wnd_zeroes(wnd); + buf->f_fsid.val[0] = sbi->volume.ser_num; + buf->f_fsid.val[1] = (sbi->volume.ser_num >> 32); + buf->f_namelen = NTFS_NAME_LEN; + + return 0; +} + +static int ntfs_show_options(struct seq_file *m, struct dentry *root) +{ + struct super_block *sb = root->d_sb; + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct ntfs_mount_options *opts = &sbi->options; + + if (opts->uid) + seq_printf(m, ",uid=%u", + from_kuid_munged(&init_user_ns, opts->fs_uid)); + if (opts->gid) + seq_printf(m, ",gid=%u", + from_kgid_munged(&init_user_ns, opts->fs_gid)); + if (opts->fmask) + seq_printf(m, ",fmask=%04o", ~opts->fs_fmask_inv); + if (opts->dmask) + seq_printf(m, ",dmask=%04o", ~opts->fs_dmask_inv); + if (opts->nls) + seq_printf(m, ",nls=%s", opts->nls->charset); + else + seq_puts(m, ",nls=utf8"); + if (opts->sys_immutable) + seq_puts(m, ",sys_immutable"); + if (opts->discard) + seq_puts(m, ",discard"); + if (opts->sparse) + seq_puts(m, ",sparse"); + if (opts->showmeta) + seq_puts(m, ",showmeta"); + if (opts->nohidden) + seq_puts(m, ",nohidden"); + if (opts->force) + seq_puts(m, ",force"); + if (opts->no_acs_rules) + seq_puts(m, ",no_acs_rules"); + if (opts->prealloc) + seq_puts(m, ",prealloc"); + if (sb->s_flags & SB_POSIXACL) + seq_puts(m, ",acl"); + if (sb->s_flags & SB_NOATIME) + seq_puts(m, ",noatime"); + + return 0; +} + +/*super_operations::sync_fs*/ +static int ntfs_sync_fs(struct super_block *sb, int wait) +{ + int err = 0, err2; + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct ntfs_inode *ni; + struct inode *inode; + + ni = sbi->security.ni; + if (ni) { + inode = &ni->vfs_inode; + err2 = _ni_write_inode(inode, wait); + if (err2 && !err) + err = err2; + } + + ni = sbi->objid.ni; + if (ni) { + inode = &ni->vfs_inode; + err2 = _ni_write_inode(inode, wait); + if (err2 && !err) + err = err2; + } + + ni = sbi->reparse.ni; + if (ni) { + inode = &ni->vfs_inode; + err2 = _ni_write_inode(inode, wait); + if (err2 && !err) + err = err2; + } + + if (!err) + ntfs_set_state(sbi, NTFS_DIRTY_CLEAR); + + ntfs_update_mftmirr(sbi, wait); + + return err; +} + +static const struct super_operations ntfs_sops = { + .alloc_inode = ntfs_alloc_inode, + .destroy_inode = ntfs_destroy_inode, + .evict_inode = ntfs_evict_inode, + .put_super = ntfs_put_super, + .statfs = ntfs_statfs, + .show_options = ntfs_show_options, + .sync_fs = ntfs_sync_fs, + .remount_fs = ntfs_remount, + .write_inode = ntfs3_write_inode, +}; + +static struct inode *ntfs_export_get_inode(struct super_block *sb, u64 ino, + u32 generation) +{ + struct MFT_REF ref; + struct inode *inode; + + ref.low = cpu_to_le32(ino); +#ifdef CONFIG_NTFS3_64BIT_CLUSTER + ref.high = cpu_to_le16(ino >> 32); +#else + ref.high = 0; +#endif + ref.seq = cpu_to_le16(generation); + + inode = ntfs_iget5(sb, &ref, NULL); + if (!IS_ERR(inode) && is_bad_inode(inode)) { + iput(inode); + inode = ERR_PTR(-ESTALE); + } + + return inode; +} + +static struct dentry *ntfs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_dentry(sb, fid, fh_len, fh_type, + ntfs_export_get_inode); +} + +static struct dentry *ntfs_fh_to_parent(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + return generic_fh_to_parent(sb, fid, fh_len, fh_type, + ntfs_export_get_inode); +} + +/* TODO: == ntfs_sync_inode */ +static int ntfs_nfs_commit_metadata(struct inode *inode) +{ + return _ni_write_inode(inode, 1); +} + +static const struct export_operations ntfs_export_ops = { + .fh_to_dentry = ntfs_fh_to_dentry, + .fh_to_parent = ntfs_fh_to_parent, + .get_parent = ntfs3_get_parent, + .commit_metadata = ntfs_nfs_commit_metadata, +}; + +/* Returns Gb,Mb to print with "%u.%02u Gb" */ +static u32 format_size_gb(const u64 bytes, u32 *mb) +{ + /* Do simple right 30 bit shift of 64 bit value */ + u64 kbytes = bytes >> 10; + u32 kbytes32 = kbytes; + + *mb = (100 * (kbytes32 & 0xfffff) + 0x7ffff) >> 20; + if (*mb >= 100) + *mb = 99; + + return (kbytes32 >> 20) | (((u32)(kbytes >> 32)) << 12); +} + +static u32 true_sectors_per_clst(const struct NTFS_BOOT *boot) +{ + return boot->sectors_per_clusters <= 0x80 + ? boot->sectors_per_clusters + : (1u << (0 - boot->sectors_per_clusters)); +} + +/* inits internal info from on-disk boot sector*/ +static int ntfs_init_from_boot(struct super_block *sb, u32 sector_size, + u64 dev_size) +{ + struct ntfs_sb_info *sbi = sb->s_fs_info; + int err; + u32 mb, gb, boot_sector_size, sct_per_clst, record_size; + u64 sectors, clusters, fs_size, mlcn, mlcn2; + struct NTFS_BOOT *boot; + struct buffer_head *bh; + struct MFT_REC *rec; + u16 fn, ao; + + sbi->volume.blocks = dev_size >> PAGE_SHIFT; + + bh = ntfs_bread(sb, 0); + if (!bh) + return -EIO; + + err = -EINVAL; + boot = (struct NTFS_BOOT *)bh->b_data; + + if (memcmp(boot->system_id, "NTFS ", sizeof("NTFS ") - 1)) + goto out; + + /* 0x55AA is not mandaroty. Thanks Maxim Suhanov*/ + /*if (0x55 != boot->boot_magic[0] || 0xAA != boot->boot_magic[1]) + * goto out; + */ + + boot_sector_size = (u32)boot->bytes_per_sector[1] << 8; + if (boot->bytes_per_sector[0] || boot_sector_size < SECTOR_SIZE || + !is_power_of2(boot_sector_size)) { + goto out; + } + + /* cluster size: 512, 1K, 2K, 4K, ... 2M */ + sct_per_clst = true_sectors_per_clst(boot); + if (!is_power_of2(sct_per_clst)) + goto out; + + mlcn = le64_to_cpu(boot->mft_clst); + mlcn2 = le64_to_cpu(boot->mft2_clst); + sectors = le64_to_cpu(boot->sectors_per_volume); + + if (mlcn * sct_per_clst >= sectors) + goto out; + + if (mlcn2 * sct_per_clst >= sectors) + goto out; + + /* Check MFT record size */ + if ((boot->record_size < 0 && + SECTOR_SIZE > (2U << (-boot->record_size))) || + (boot->record_size >= 0 && !is_power_of2(boot->record_size))) { + goto out; + } + + /* Check index record size */ + if ((boot->index_size < 0 && + SECTOR_SIZE > (2U << (-boot->index_size))) || + (boot->index_size >= 0 && !is_power_of2(boot->index_size))) { + goto out; + } + + sbi->sector_size = boot_sector_size; + sbi->sector_bits = blksize_bits(boot_sector_size); + fs_size = (sectors + 1) << sbi->sector_bits; + + gb = format_size_gb(fs_size, &mb); + + /* + * - Volume formatted and mounted with the same sector size + * - Volume formatted 4K and mounted as 512 + * - Volume formatted 512 and mounted as 4K + */ + if (sbi->sector_size != sector_size) { + ntfs_warn(sb, + "Different NTFS' sector size and media sector size"); + dev_size += sector_size - 1; + } + + sbi->cluster_size = boot_sector_size * sct_per_clst; + sbi->cluster_bits = blksize_bits(sbi->cluster_size); + + sbi->mft.lbo = mlcn << sbi->cluster_bits; + sbi->mft.lbo2 = mlcn2 << sbi->cluster_bits; + + if (sbi->cluster_size < sbi->sector_size) + goto out; + + sbi->cluster_mask = sbi->cluster_size - 1; + sbi->cluster_mask_inv = ~(u64)sbi->cluster_mask; + sbi->record_size = record_size = boot->record_size < 0 + ? 1 << (-boot->record_size) + : (u32)boot->record_size + << sbi->cluster_bits; + + if (record_size > MAXIMUM_BYTES_PER_MFT) + goto out; + + sbi->record_bits = blksize_bits(record_size); + sbi->attr_size_tr = (5 * record_size >> 4); // ~320 bytes + + sbi->max_bytes_per_attr = + record_size - QuadAlign(MFTRECORD_FIXUP_OFFSET_1) - + QuadAlign(((record_size >> SECTOR_SHIFT) * sizeof(short))) - + QuadAlign(sizeof(enum ATTR_TYPE)); + + sbi->index_size = boot->index_size < 0 + ? 1u << (-boot->index_size) + : (u32)boot->index_size << sbi->cluster_bits; + + sbi->volume.ser_num = le64_to_cpu(boot->serial_num); + sbi->volume.size = sectors << sbi->sector_bits; + + /* warning if RAW volume */ + if (dev_size < fs_size) { + u32 mb0, gb0; + + gb0 = format_size_gb(dev_size, &mb0); + ntfs_warn( + sb, + "RAW NTFS volume: Filesystem size %u.%02u Gb > volume size %u.%02u Gb. Mount in read-only", + gb, mb, gb0, mb0); + sb->s_flags |= SB_RDONLY; + } + + clusters = sbi->volume.size >> sbi->cluster_bits; +#ifndef CONFIG_NTFS3_64BIT_CLUSTER + /* 32 bits per cluster */ + if (clusters >> 32) { + ntfs_notice( + sb, + "NTFS %u.%02u Gb is too big to use 32 bits per cluster", + gb, mb); + goto out; + } +#elif BITS_PER_LONG < 64 +#error "CONFIG_NTFS3_64BIT_CLUSTER incompatible in 32 bit OS" +#endif + + sbi->used.bitmap.nbits = clusters; + + rec = ntfs_zalloc(record_size); + if (!rec) { + err = -ENOMEM; + goto out; + } + + sbi->new_rec = rec; + rec->rhdr.sign = NTFS_FILE_SIGNATURE; + rec->rhdr.fix_off = cpu_to_le16(MFTRECORD_FIXUP_OFFSET_1); + fn = (sbi->record_size >> SECTOR_SHIFT) + 1; + rec->rhdr.fix_num = cpu_to_le16(fn); + ao = QuadAlign(MFTRECORD_FIXUP_OFFSET_1 + sizeof(short) * fn); + rec->attr_off = cpu_to_le16(ao); + rec->used = cpu_to_le32(ao + QuadAlign(sizeof(enum ATTR_TYPE))); + rec->total = cpu_to_le32(sbi->record_size); + ((struct ATTRIB *)Add2Ptr(rec, ao))->type = ATTR_END; + + if (sbi->cluster_size < PAGE_SIZE) + sb_set_blocksize(sb, sbi->cluster_size); + + sbi->block_mask = sb->s_blocksize - 1; + sbi->blocks_per_cluster = sbi->cluster_size >> sb->s_blocksize_bits; + sbi->volume.blocks = sbi->volume.size >> sb->s_blocksize_bits; + + /* Maximum size for normal files */ + sbi->maxbytes = (clusters << sbi->cluster_bits) - 1; + +#ifdef CONFIG_NTFS3_64BIT_CLUSTER + if (clusters >= (1ull << (64 - sbi->cluster_bits))) + sbi->maxbytes = -1; + sbi->maxbytes_sparse = -1; +#else + /* Maximum size for sparse file */ + sbi->maxbytes_sparse = (1ull << (sbi->cluster_bits + 32)) - 1; +#endif + + err = 0; + +out: + brelse(bh); + + return err; +} + +/* try to mount*/ +static int ntfs_fill_super(struct super_block *sb, void *data, int silent) +{ + int err; + struct ntfs_sb_info *sbi; + struct block_device *bdev = sb->s_bdev; + struct inode *bd_inode = bdev->bd_inode; + struct request_queue *rq = bdev_get_queue(bdev); + struct inode *inode = NULL; + struct ntfs_inode *ni; + size_t i, tt; + CLST vcn, lcn, len; + struct ATTRIB *attr; + const struct VOLUME_INFO *info; + u32 idx, done, bytes; + struct ATTR_DEF_ENTRY *t; + u16 *upcase = NULL; + u16 *shared; + bool is_ro; + struct MFT_REF ref; + + ref.high = 0; + + sbi = ntfs_zalloc(sizeof(struct ntfs_sb_info)); + if (!sbi) + return -ENOMEM; + + sb->s_fs_info = sbi; + sbi->sb = sb; + sb->s_flags |= SB_NODIRATIME; + sb->s_magic = 0x7366746e; // "ntfs" + sb->s_op = &ntfs_sops; + sb->s_export_op = &ntfs_export_ops; + sb->s_time_gran = NTFS_TIME_GRAN; // 100 nsec + sb->s_xattr = ntfs_xattr_handlers; + + ratelimit_state_init(&sbi->msg_ratelimit, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + + err = ntfs_parse_options(sb, data, silent, &sbi->options); + if (err) + goto out; + + if (!rq || !blk_queue_discard(rq) || !rq->limits.discard_granularity) { + ; + } else { + sbi->discard_granularity = rq->limits.discard_granularity; + sbi->discard_granularity_mask_inv = + ~(u64)(sbi->discard_granularity - 1); + } + + sb_set_blocksize(sb, PAGE_SIZE); + + /* parse boot */ + err = ntfs_init_from_boot(sb, rq ? queue_logical_block_size(rq) : 512, + bd_inode->i_size); + if (err) + goto out; + +#ifdef CONFIG_NTFS3_64BIT_CLUSTER + sb->s_maxbytes = MAX_LFS_FILESIZE; +#else + sb->s_maxbytes = 0xFFFFFFFFull << sbi->cluster_bits; +#endif + + mutex_init(&sbi->compress.mtx_lznt); +#ifdef CONFIG_NTFS3_LZX_XPRESS + mutex_init(&sbi->compress.mtx_xpress); + mutex_init(&sbi->compress.mtx_lzx); +#endif + + /* + * Load $Volume. This should be done before LogFile + * 'cause 'sbi->volume.ni' is used 'ntfs_set_state' + */ + ref.low = cpu_to_le32(MFT_REC_VOL); + ref.seq = cpu_to_le16(MFT_REC_VOL); + inode = ntfs_iget5(sb, &ref, &NAME_VOLUME); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + ntfs_err(sb, "Failed to load $Volume."); + inode = NULL; + goto out; + } + + ni = ntfs_i(inode); + + /* Load and save label (not necessary) */ + attr = ni_find_attr(ni, NULL, NULL, ATTR_LABEL, NULL, 0, NULL, NULL); + + if (!attr) { + /* It is ok if no ATTR_LABEL */ + } else if (!attr->non_res && !is_attr_ext(attr)) { + /* $AttrDef allows labels to be up to 128 symbols */ + err = utf16s_to_utf8s(resident_data(attr), + le32_to_cpu(attr->res.data_size) >> 1, + UTF16_LITTLE_ENDIAN, sbi->volume.label, + sizeof(sbi->volume.label)); + if (err < 0) + sbi->volume.label[0] = 0; + } else { + /* should we break mounting here? */ + //err = -EINVAL; + //goto out; + } + + attr = ni_find_attr(ni, attr, NULL, ATTR_VOL_INFO, NULL, 0, NULL, NULL); + if (!attr || is_attr_ext(attr)) { + err = -EINVAL; + goto out; + } + + info = resident_data_ex(attr, SIZEOF_ATTRIBUTE_VOLUME_INFO); + if (!info) { + err = -EINVAL; + goto out; + } + + sbi->volume.major_ver = info->major_ver; + sbi->volume.minor_ver = info->minor_ver; + sbi->volume.flags = info->flags; + + sbi->volume.ni = ni; + inode = NULL; + + /* Load $MFTMirr to estimate recs_mirr */ + ref.low = cpu_to_le32(MFT_REC_MIRR); + ref.seq = cpu_to_le16(MFT_REC_MIRR); + inode = ntfs_iget5(sb, &ref, &NAME_MIRROR); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + ntfs_err(sb, "Failed to load $MFTMirr."); + inode = NULL; + goto out; + } + + sbi->mft.recs_mirr = + ntfs_up_cluster(sbi, inode->i_size) >> sbi->record_bits; + + iput(inode); + + /* Load LogFile to replay */ + ref.low = cpu_to_le32(MFT_REC_LOG); + ref.seq = cpu_to_le16(MFT_REC_LOG); + inode = ntfs_iget5(sb, &ref, &NAME_LOGFILE); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + ntfs_err(sb, "Failed to load \x24LogFile."); + inode = NULL; + goto out; + } + + ni = ntfs_i(inode); + + err = ntfs_loadlog_and_replay(ni, sbi); + if (err) + goto out; + + iput(inode); + inode = NULL; + + is_ro = sb_rdonly(sbi->sb); + + if (sbi->flags & NTFS_FLAGS_NEED_REPLAY) { + if (!is_ro) { + ntfs_warn(sb, + "failed to replay log file. Can't mount rw!"); + err = -EINVAL; + goto out; + } + } else if (sbi->volume.flags & VOLUME_FLAG_DIRTY) { + if (!is_ro && !sbi->options.force) { + ntfs_warn( + sb, + "volume is dirty and \"force\" flag is not set!"); + err = -EINVAL; + goto out; + } + } + + /* Load $MFT */ + ref.low = cpu_to_le32(MFT_REC_MFT); + ref.seq = cpu_to_le16(1); + + inode = ntfs_iget5(sb, &ref, &NAME_MFT); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + ntfs_err(sb, "Failed to load $MFT."); + inode = NULL; + goto out; + } + + ni = ntfs_i(inode); + + sbi->mft.used = ni->i_valid >> sbi->record_bits; + tt = inode->i_size >> sbi->record_bits; + sbi->mft.next_free = MFT_REC_USER; + + err = wnd_init(&sbi->mft.bitmap, sb, tt); + if (err) + goto out; + + err = ni_load_all_mi(ni); + if (err) + goto out; + + sbi->mft.ni = ni; + + /* Load $BadClus */ + ref.low = cpu_to_le32(MFT_REC_BADCLUST); + ref.seq = cpu_to_le16(MFT_REC_BADCLUST); + inode = ntfs_iget5(sb, &ref, &NAME_BADCLUS); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + ntfs_err(sb, "Failed to load $BadClus."); + inode = NULL; + goto out; + } + + ni = ntfs_i(inode); + + for (i = 0; run_get_entry(&ni->file.run, i, &vcn, &lcn, &len); i++) { + if (lcn == SPARSE_LCN) + continue; + + if (!sbi->bad_clusters) + ntfs_notice(sb, "Volume contains bad blocks"); + + sbi->bad_clusters += len; + } + + iput(inode); + + /* Load $Bitmap */ + ref.low = cpu_to_le32(MFT_REC_BITMAP); + ref.seq = cpu_to_le16(MFT_REC_BITMAP); + inode = ntfs_iget5(sb, &ref, &NAME_BITMAP); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + ntfs_err(sb, "Failed to load $Bitmap."); + inode = NULL; + goto out; + } + + ni = ntfs_i(inode); + +#ifndef CONFIG_NTFS3_64BIT_CLUSTER + if (inode->i_size >> 32) { + err = -EINVAL; + goto out; + } +#endif + + /* Check bitmap boundary */ + tt = sbi->used.bitmap.nbits; + if (inode->i_size < bitmap_size(tt)) { + err = -EINVAL; + goto out; + } + + /* Not necessary */ + sbi->used.bitmap.set_tail = true; + err = wnd_init(&sbi->used.bitmap, sbi->sb, tt); + if (err) + goto out; + + iput(inode); + + /* Compute the mft zone */ + err = ntfs_refresh_zone(sbi); + if (err) + goto out; + + /* Load $AttrDef */ + ref.low = cpu_to_le32(MFT_REC_ATTR); + ref.seq = cpu_to_le16(MFT_REC_ATTR); + inode = ntfs_iget5(sbi->sb, &ref, &NAME_ATTRDEF); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + ntfs_err(sb, "Failed to load $AttrDef -> %d", err); + inode = NULL; + goto out; + } + + if (inode->i_size < sizeof(struct ATTR_DEF_ENTRY)) { + err = -EINVAL; + goto out; + } + bytes = inode->i_size; + sbi->def_table = t = ntfs_malloc(bytes); + if (!t) { + err = -ENOMEM; + goto out; + } + + for (done = idx = 0; done < bytes; done += PAGE_SIZE, idx++) { + unsigned long tail = bytes - done; + struct page *page = ntfs_map_page(inode->i_mapping, idx); + + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto out; + } + memcpy(Add2Ptr(t, done), page_address(page), + min(PAGE_SIZE, tail)); + ntfs_unmap_page(page); + + if (!idx && ATTR_STD != t->type) { + err = -EINVAL; + goto out; + } + } + + t += 1; + sbi->def_entries = 1; + done = sizeof(struct ATTR_DEF_ENTRY); + sbi->reparse.max_size = MAXIMUM_REPARSE_DATA_BUFFER_SIZE; + sbi->ea_max_size = 0x10000; /* default formater value */ + + while (done + sizeof(struct ATTR_DEF_ENTRY) <= bytes) { + u32 t32 = le32_to_cpu(t->type); + u64 sz = le64_to_cpu(t->max_sz); + + if ((t32 & 0xF) || le32_to_cpu(t[-1].type) >= t32) + break; + + if (t->type == ATTR_REPARSE) + sbi->reparse.max_size = sz; + else if (t->type == ATTR_EA) + sbi->ea_max_size = sz; + + done += sizeof(struct ATTR_DEF_ENTRY); + t += 1; + sbi->def_entries += 1; + } + iput(inode); + + /* Load $UpCase */ + ref.low = cpu_to_le32(MFT_REC_UPCASE); + ref.seq = cpu_to_le16(MFT_REC_UPCASE); + inode = ntfs_iget5(sb, &ref, &NAME_UPCASE); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + ntfs_err(sb, "Failed to load \x24LogFile."); + inode = NULL; + goto out; + } + + ni = ntfs_i(inode); + + if (inode->i_size != 0x10000 * sizeof(short)) { + err = -EINVAL; + goto out; + } + + sbi->upcase = upcase = ntfs_vmalloc(0x10000 * sizeof(short)); + if (!upcase) { + err = -ENOMEM; + goto out; + } + + for (idx = 0; idx < (0x10000 * sizeof(short) >> PAGE_SHIFT); idx++) { + const __le16 *src; + u16 *dst = Add2Ptr(upcase, idx << PAGE_SHIFT); + struct page *page = ntfs_map_page(inode->i_mapping, idx); + + if (IS_ERR(page)) { + err = PTR_ERR(page); + goto out; + } + + src = page_address(page); + +#ifdef __BIG_ENDIAN + for (i = 0; i < PAGE_SIZE / sizeof(u16); i++) + *dst++ = le16_to_cpu(*src++); +#else + memcpy(dst, src, PAGE_SIZE); +#endif + ntfs_unmap_page(page); + } + + shared = ntfs_set_shared(upcase, 0x10000 * sizeof(short)); + if (shared && upcase != shared) { + sbi->upcase = shared; + ntfs_vfree(upcase); + } + + iput(inode); + inode = NULL; + + if (is_ntfs3(sbi)) { + /* Load $Secure */ + err = ntfs_security_init(sbi); + if (err) + goto out; + + /* Load $Extend */ + err = ntfs_extend_init(sbi); + if (err) + goto load_root; + + /* Load $Extend\$Reparse */ + err = ntfs_reparse_init(sbi); + if (err) + goto load_root; + + /* Load $Extend\$ObjId */ + err = ntfs_objid_init(sbi); + if (err) + goto load_root; + } + +load_root: + /* Load root */ + ref.low = cpu_to_le32(MFT_REC_ROOT); + ref.seq = cpu_to_le16(MFT_REC_ROOT); + inode = ntfs_iget5(sb, &ref, &NAME_ROOT); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + ntfs_err(sb, "Failed to load root."); + inode = NULL; + goto out; + } + + ni = ntfs_i(inode); + + sb->s_root = d_make_root(inode); + + if (!sb->s_root) { + err = -EINVAL; + goto out; + } + + return 0; + +out: + iput(inode); + + if (sb->s_root) { + d_drop(sb->s_root); + sb->s_root = NULL; + } + + put_ntfs(sbi); + + sb->s_fs_info = NULL; + return err; +} + +void ntfs_unmap_meta(struct super_block *sb, CLST lcn, CLST len) +{ + struct ntfs_sb_info *sbi = sb->s_fs_info; + struct block_device *bdev = sb->s_bdev; + sector_t devblock = (u64)lcn * sbi->blocks_per_cluster; + unsigned long blocks = (u64)len * sbi->blocks_per_cluster; + unsigned long cnt = 0; + unsigned long limit = global_zone_page_state(NR_FREE_PAGES) + << (PAGE_SHIFT - sb->s_blocksize_bits); + + if (limit >= 0x2000) + limit -= 0x1000; + else if (limit < 32) + limit = 32; + else + limit >>= 1; + + while (blocks--) { + clean_bdev_aliases(bdev, devblock++, 1); + if (cnt++ >= limit) { + sync_blockdev(bdev); + cnt = 0; + } + } +} + +/* + * ntfs_discard + * + * issue a discard request (trim for SSD) + */ +int ntfs_discard(struct ntfs_sb_info *sbi, CLST lcn, CLST len) +{ + int err; + u64 lbo, bytes, start, end; + struct super_block *sb; + + if (sbi->used.next_free_lcn == lcn + len) + sbi->used.next_free_lcn = lcn; + + if (sbi->flags & NTFS_FLAGS_NODISCARD) + return -EOPNOTSUPP; + + if (!sbi->options.discard) + return -EOPNOTSUPP; + + lbo = (u64)lcn << sbi->cluster_bits; + bytes = (u64)len << sbi->cluster_bits; + + /* Align up 'start' on discard_granularity */ + start = (lbo + sbi->discard_granularity - 1) & + sbi->discard_granularity_mask_inv; + /* Align down 'end' on discard_granularity */ + end = (lbo + bytes) & sbi->discard_granularity_mask_inv; + + sb = sbi->sb; + if (start >= end) + return 0; + + err = blkdev_issue_discard(sb->s_bdev, start >> 9, (end - start) >> 9, + GFP_NOFS, 0); + + if (err == -EOPNOTSUPP) + sbi->flags |= NTFS_FLAGS_NODISCARD; + + return err; +} + +static struct dentry *ntfs_mount(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + return mount_bdev(fs_type, flags, dev_name, data, ntfs_fill_super); +} + +static struct file_system_type ntfs_fs_type = { + .owner = THIS_MODULE, + .name = "ntfs3", + .mount = ntfs_mount, + .kill_sb = kill_block_super, + .fs_flags = FS_REQUIRES_DEV, +}; + +static int __init init_ntfs_fs(void) +{ + int err; + + pr_notice("ntfs3: Index binary search\n"); + pr_notice("ntfs3: Hot fix free clusters\n"); + pr_notice("ntfs3: Max link count %u\n", NTFS_LINK_MAX); + +#ifdef CONFIG_NTFS3_FS_POSIX_ACL + pr_notice("ntfs3: Enabled Linux POSIX ACLs support\n"); +#endif +#ifdef CONFIG_NTFS3_64BIT_CLUSTER + pr_notice("ntfs3: Activated 64 bits per cluster\n"); +#else + pr_notice("ntfs3: Activated 32 bits per cluster\n"); +#endif +#ifdef CONFIG_NTFS3_LZX_XPRESS + pr_notice("ntfs3: Read-only lzx/xpress compression included\n"); +#endif + + err = ntfs3_init_bitmap(); + if (err) + return err; + + ntfs_inode_cachep = kmem_cache_create( + "ntfs_inode_cache", sizeof(struct ntfs_inode), 0, + (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD | SLAB_ACCOUNT), + init_once); + if (!ntfs_inode_cachep) { + err = -ENOMEM; + goto out1; + } + + err = register_filesystem(&ntfs_fs_type); + if (err) + goto out; + + return 0; +out: + kmem_cache_destroy(ntfs_inode_cachep); +out1: + ntfs3_exit_bitmap(); + return err; +} + +static void __exit exit_ntfs_fs(void) +{ + if (ntfs_inode_cachep) { + rcu_barrier(); + kmem_cache_destroy(ntfs_inode_cachep); + } + + unregister_filesystem(&ntfs_fs_type); + ntfs3_exit_bitmap(); +} + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ntfs3 read/write filesystem"); +MODULE_INFO(behaviour, "Index binary search"); +MODULE_INFO(behaviour, "Hot fix free clusters"); +#ifdef CONFIG_NTFS3_FS_POSIX_ACL +MODULE_INFO(behaviour, "Enabled Linux POSIX ACLs support"); +#endif +#ifdef CONFIG_NTFS3_64BIT_CLUSTER +MODULE_INFO(cluster, "Activated 64 bits per cluster"); +#else +MODULE_INFO(cluster, "Activated 32 bits per cluster"); +#endif +#ifdef CONFIG_NTFS3_LZX_XPRESS +MODULE_INFO(compression, "Read-only lzx/xpress compression included"); +#endif + +MODULE_AUTHOR("Konstantin Komarov"); +MODULE_ALIAS_FS("ntfs3"); + +module_init(init_ntfs_fs); +module_exit(exit_ntfs_fs); diff --git a/fs/ntfs3/upcase.c b/fs/ntfs3/upcase.c new file mode 100644 index 000000000000..9617382aca64 --- /dev/null +++ b/fs/ntfs3/upcase.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + */ +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" + +static inline u16 upcase_unicode_char(const u16 *upcase, u16 chr) +{ + if (chr < 'a') + return chr; + + if (chr <= 'z') + return chr - ('a' - 'A'); + + return upcase[chr]; +} + +/* + * Thanks Kari Argillander for idea and implementation 'bothcase' + * + * Straigth way to compare names: + * - case insensitive + * - if name equals and 'bothcases' then + * - case sensitive + * 'Straigth way' code scans input names twice in worst case + * Optimized code scans input names only once + */ +int ntfs_cmp_names(const __le16 *s1, size_t l1, const __le16 *s2, size_t l2, + const u16 *upcase, bool bothcase) +{ + int diff1 = 0; + int diff2; + size_t len = min(l1, l2); + + if (!bothcase && upcase) + goto case_insentive; + + for (; len; s1++, s2++, len--) { + diff1 = le16_to_cpu(*s1) - le16_to_cpu(*s2); + if (diff1) { + if (bothcase && upcase) + goto case_insentive; + + return diff1; + } + } + return l1 - l2; + +case_insentive: + for (; len; s1++, s2++, len--) { + diff2 = upcase_unicode_char(upcase, le16_to_cpu(*s1)) - + upcase_unicode_char(upcase, le16_to_cpu(*s2)); + if (diff2) + return diff2; + } + + diff2 = l1 - l2; + return diff2 ? diff2 : diff1; +} + +int ntfs_cmp_names_cpu(const struct cpu_str *uni1, const struct le_str *uni2, + const u16 *upcase, bool bothcase) +{ + const u16 *s1 = uni1->name; + const __le16 *s2 = uni2->name; + size_t l1 = uni1->len; + size_t l2 = uni2->len; + size_t len = min(l1, l2); + int diff1 = 0; + int diff2; + + if (!bothcase && upcase) + goto case_insentive; + + for (; len; s1++, s2++, len--) { + diff1 = *s1 - le16_to_cpu(*s2); + if (diff1) { + if (bothcase && upcase) + goto case_insentive; + + return diff1; + } + } + return l1 - l2; + +case_insentive: + for (; len; s1++, s2++, len--) { + diff2 = upcase_unicode_char(upcase, *s1) - + upcase_unicode_char(upcase, le16_to_cpu(*s2)); + if (diff2) + return diff2; + } + + diff2 = l1 - l2; + return diff2 ? diff2 : diff1; +} diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c new file mode 100644 index 000000000000..f81329ef79c7 --- /dev/null +++ b/fs/ntfs3/xattr.c @@ -0,0 +1,1037 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * + * Copyright (C) 2019-2021 Paragon Software GmbH, All rights reserved. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "ntfs.h" +#include "ntfs_fs.h" + +// clang-format off +#define SYSTEM_DOS_ATTRIB "system.dos_attrib" +#define SYSTEM_NTFS_ATTRIB "system.ntfs_attrib" +#define SYSTEM_NTFS_SECURITY "system.ntfs_security" +// clang-format on + +static inline size_t unpacked_ea_size(const struct EA_FULL *ea) +{ + return !ea->size ? DwordAlign(offsetof(struct EA_FULL, name) + 1 + + ea->name_len + le16_to_cpu(ea->elength)) + : le32_to_cpu(ea->size); +} + +static inline size_t packed_ea_size(const struct EA_FULL *ea) +{ + return offsetof(struct EA_FULL, name) + 1 - + offsetof(struct EA_FULL, flags) + ea->name_len + + le16_to_cpu(ea->elength); +} + +/* + * find_ea + * + * assume there is at least one xattr in the list + */ +static inline bool find_ea(const struct EA_FULL *ea_all, u32 bytes, + const char *name, u8 name_len, u32 *off) +{ + *off = 0; + + if (!ea_all || !bytes) + return false; + + for (;;) { + const struct EA_FULL *ea = Add2Ptr(ea_all, *off); + u32 next_off = *off + unpacked_ea_size(ea); + + if (next_off > bytes) + return false; + + if (ea->name_len == name_len && + !memcmp(ea->name, name, name_len)) + return true; + + *off = next_off; + if (next_off >= bytes) + return false; + } +} + +/* + * ntfs_read_ea + * + * reads all extended attributes + * ea - new allocated memory + * info - pointer into resident data + */ +static int ntfs_read_ea(struct ntfs_inode *ni, struct EA_FULL **ea, + size_t add_bytes, const struct EA_INFO **info) +{ + int err; + struct ATTR_LIST_ENTRY *le = NULL; + struct ATTRIB *attr_info, *attr_ea; + void *ea_p; + u32 size; + + static_assert(le32_to_cpu(ATTR_EA_INFO) < le32_to_cpu(ATTR_EA)); + + *ea = NULL; + *info = NULL; + + attr_info = + ni_find_attr(ni, NULL, &le, ATTR_EA_INFO, NULL, 0, NULL, NULL); + attr_ea = + ni_find_attr(ni, attr_info, &le, ATTR_EA, NULL, 0, NULL, NULL); + + if (!attr_ea || !attr_info) + return 0; + + *info = resident_data_ex(attr_info, sizeof(struct EA_INFO)); + if (!*info) + return -EINVAL; + + /* Check Ea limit */ + size = le32_to_cpu((*info)->size); + if (size > ni->mi.sbi->ea_max_size) + return -EFBIG; + + if (attr_size(attr_ea) > ni->mi.sbi->ea_max_size) + return -EFBIG; + + /* Allocate memory for packed Ea */ + ea_p = ntfs_malloc(size + add_bytes); + if (!ea_p) + return -ENOMEM; + + if (attr_ea->non_res) { + struct runs_tree run; + + run_init(&run); + + err = attr_load_runs(attr_ea, ni, &run, NULL); + if (!err) + err = ntfs_read_run_nb(ni->mi.sbi, &run, 0, ea_p, size, + NULL); + run_close(&run); + + if (err) + goto out; + } else { + void *p = resident_data_ex(attr_ea, size); + + if (!p) { + err = -EINVAL; + goto out; + } + memcpy(ea_p, p, size); + } + + memset(Add2Ptr(ea_p, size), 0, add_bytes); + *ea = ea_p; + return 0; + +out: + ntfs_free(ea_p); + *ea = NULL; + return err; +} + +/* + * ntfs_list_ea + * + * copy a list of xattrs names into the buffer + * provided, or compute the buffer size required + */ +static int ntfs_list_ea(struct ntfs_inode *ni, char *buffer, + size_t bytes_per_buffer, size_t *bytes) +{ + const struct EA_INFO *info; + struct EA_FULL *ea_all = NULL; + const struct EA_FULL *ea; + u32 off, size; + int err; + + *bytes = 0; + + err = ntfs_read_ea(ni, &ea_all, 0, &info); + if (err) + return err; + + if (!info || !ea_all) + return 0; + + size = le32_to_cpu(info->size); + + /* Enumerate all xattrs */ + for (off = 0; off < size; off += unpacked_ea_size(ea)) { + ea = Add2Ptr(ea_all, off); + + if (buffer) { + if (*bytes + ea->name_len + 1 > bytes_per_buffer) { + err = -ERANGE; + goto out; + } + + memcpy(buffer + *bytes, ea->name, ea->name_len); + buffer[*bytes + ea->name_len] = 0; + } + + *bytes += ea->name_len + 1; + } + +out: + ntfs_free(ea_all); + return err; +} + +static int ntfs_get_ea(struct inode *inode, const char *name, size_t name_len, + void *buffer, size_t size, size_t *required) +{ + struct ntfs_inode *ni = ntfs_i(inode); + const struct EA_INFO *info; + struct EA_FULL *ea_all = NULL; + const struct EA_FULL *ea; + u32 off, len; + int err; + + if (!(ni->ni_flags & NI_FLAG_EA)) + return -ENODATA; + + if (!required) + ni_lock(ni); + + len = 0; + + if (name_len > 255) { + err = -ENAMETOOLONG; + goto out; + } + + err = ntfs_read_ea(ni, &ea_all, 0, &info); + if (err) + goto out; + + if (!info) + goto out; + + /* Enumerate all xattrs */ + if (!find_ea(ea_all, le32_to_cpu(info->size), name, name_len, &off)) { + err = -ENODATA; + goto out; + } + ea = Add2Ptr(ea_all, off); + + len = le16_to_cpu(ea->elength); + if (!buffer) { + err = 0; + goto out; + } + + if (len > size) { + err = -ERANGE; + if (required) + *required = len; + goto out; + } + + memcpy(buffer, ea->name + ea->name_len + 1, len); + err = 0; + +out: + ntfs_free(ea_all); + if (!required) + ni_unlock(ni); + + return err ? err : len; +} + +static noinline int ntfs_set_ea(struct inode *inode, const char *name, + size_t name_len, const void *value, + size_t val_size, int flags, int locked) +{ + struct ntfs_inode *ni = ntfs_i(inode); + struct ntfs_sb_info *sbi = ni->mi.sbi; + int err; + struct EA_INFO ea_info; + const struct EA_INFO *info; + struct EA_FULL *new_ea; + struct EA_FULL *ea_all = NULL; + size_t add, new_pack; + u32 off, size; + __le16 size_pack; + struct ATTRIB *attr; + struct ATTR_LIST_ENTRY *le; + struct mft_inode *mi; + struct runs_tree ea_run; + u64 new_sz; + void *p; + + if (!locked) + ni_lock(ni); + + run_init(&ea_run); + + if (name_len > 255) { + err = -ENAMETOOLONG; + goto out; + } + + add = DwordAlign(offsetof(struct EA_FULL, name) + 1 + name_len + + val_size); + + err = ntfs_read_ea(ni, &ea_all, add, &info); + if (err) + goto out; + + if (!info) { + memset(&ea_info, 0, sizeof(ea_info)); + size = 0; + size_pack = 0; + } else { + memcpy(&ea_info, info, sizeof(ea_info)); + size = le32_to_cpu(ea_info.size); + size_pack = ea_info.size_pack; + } + + if (info && find_ea(ea_all, size, name, name_len, &off)) { + struct EA_FULL *ea; + size_t ea_sz; + + if (flags & XATTR_CREATE) { + err = -EEXIST; + goto out; + } + + /* Remove current xattr */ + ea = Add2Ptr(ea_all, off); + if (ea->flags & FILE_NEED_EA) + le16_add_cpu(&ea_info.count, -1); + + ea_sz = unpacked_ea_size(ea); + + le16_add_cpu(&ea_info.size_pack, 0 - packed_ea_size(ea)); + + memmove(ea, Add2Ptr(ea, ea_sz), size - off - ea_sz); + + size -= ea_sz; + memset(Add2Ptr(ea_all, size), 0, ea_sz); + + ea_info.size = cpu_to_le32(size); + + if ((flags & XATTR_REPLACE) && !val_size) + goto update_ea; + } else { + if (flags & XATTR_REPLACE) { + err = -ENODATA; + goto out; + } + + if (!ea_all) { + ea_all = ntfs_zalloc(add); + if (!ea_all) { + err = -ENOMEM; + goto out; + } + } + } + + /* append new xattr */ + new_ea = Add2Ptr(ea_all, size); + new_ea->size = cpu_to_le32(add); + new_ea->flags = 0; + new_ea->name_len = name_len; + new_ea->elength = cpu_to_le16(val_size); + memcpy(new_ea->name, name, name_len); + new_ea->name[name_len] = 0; + memcpy(new_ea->name + name_len + 1, value, val_size); + new_pack = le16_to_cpu(ea_info.size_pack) + packed_ea_size(new_ea); + + /* should fit into 16 bits */ + if (new_pack > 0xffff) { + err = -EFBIG; // -EINVAL? + goto out; + } + ea_info.size_pack = cpu_to_le16(new_pack); + + /* new size of ATTR_EA */ + size += add; + if (size > sbi->ea_max_size) { + err = -EFBIG; // -EINVAL? + goto out; + } + ea_info.size = cpu_to_le32(size); + +update_ea: + + if (!info) { + /* Create xattr */ + if (!size) { + err = 0; + goto out; + } + + err = ni_insert_resident(ni, sizeof(struct EA_INFO), + ATTR_EA_INFO, NULL, 0, NULL, NULL); + if (err) + goto out; + + err = ni_insert_resident(ni, 0, ATTR_EA, NULL, 0, NULL, NULL); + if (err) + goto out; + } + + new_sz = size; + err = attr_set_size(ni, ATTR_EA, NULL, 0, &ea_run, new_sz, &new_sz, + false, NULL); + if (err) + goto out; + + le = NULL; + attr = ni_find_attr(ni, NULL, &le, ATTR_EA_INFO, NULL, 0, NULL, &mi); + if (!attr) { + err = -EINVAL; + goto out; + } + + if (!size) { + /* delete xattr, ATTR_EA_INFO */ + err = ni_remove_attr_le(ni, attr, le); + if (err) + goto out; + } else { + p = resident_data_ex(attr, sizeof(struct EA_INFO)); + if (!p) { + err = -EINVAL; + goto out; + } + memcpy(p, &ea_info, sizeof(struct EA_INFO)); + mi->dirty = true; + } + + le = NULL; + attr = ni_find_attr(ni, NULL, &le, ATTR_EA, NULL, 0, NULL, &mi); + if (!attr) { + err = -EINVAL; + goto out; + } + + if (!size) { + /* delete xattr, ATTR_EA */ + err = ni_remove_attr_le(ni, attr, le); + if (err) + goto out; + } else if (attr->non_res) { + err = ntfs_sb_write_run(sbi, &ea_run, 0, ea_all, size); + if (err) + goto out; + } else { + p = resident_data_ex(attr, size); + if (!p) { + err = -EINVAL; + goto out; + } + memcpy(p, ea_all, size); + mi->dirty = true; + } + + if (ea_info.size_pack != size_pack) + ni->ni_flags |= NI_FLAG_UPDATE_PARENT; + mark_inode_dirty(&ni->vfs_inode); + + /* Check if we delete the last xattr */ + if (val_size || flags != XATTR_REPLACE || + ntfs_list_ea(ni, NULL, 0, &val_size) || val_size) { + ni->ni_flags |= NI_FLAG_EA; + } else { + ni->ni_flags &= ~NI_FLAG_EA; + } + +out: + if (!locked) + ni_unlock(ni); + + run_close(&ea_run); + ntfs_free(ea_all); + + return err; +} + +#ifdef CONFIG_NTFS3_FS_POSIX_ACL +static inline void ntfs_posix_acl_release(struct posix_acl *acl) +{ + if (acl && refcount_dec_and_test(&acl->a_refcount)) + kfree(acl); +} + +static struct posix_acl *ntfs_get_acl_ex(struct inode *inode, int type, + int locked) +{ + struct ntfs_inode *ni = ntfs_i(inode); + const char *name; + size_t name_len; + struct posix_acl *acl; + size_t req; + int err; + void *buf; + + /* allocate PATH_MAX bytes */ + buf = __getname(); + if (!buf) + return ERR_PTR(-ENOMEM); + + /* Possible values of 'type' was already checked above */ + if (type == ACL_TYPE_ACCESS) { + name = XATTR_NAME_POSIX_ACL_ACCESS; + name_len = sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1; + } else { + name = XATTR_NAME_POSIX_ACL_DEFAULT; + name_len = sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1; + } + + if (!locked) + ni_lock(ni); + + err = ntfs_get_ea(inode, name, name_len, buf, PATH_MAX, &req); + + if (!locked) + ni_unlock(ni); + + /* Translate extended attribute to acl */ + if (err > 0) { + acl = posix_acl_from_xattr(&init_user_ns, buf, err); + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); + } else { + acl = err == -ENODATA ? NULL : ERR_PTR(err); + } + + __putname(buf); + + return acl; +} + +/* + * ntfs_get_acl + * + * inode_operations::get_acl + */ +struct posix_acl *ntfs_get_acl(struct inode *inode, int type) +{ + return ntfs_get_acl_ex(inode, type, 0); +} + +static noinline int ntfs_set_acl_ex(struct inode *inode, struct posix_acl *acl, + int type, int locked) +{ + const char *name; + size_t size, name_len; + void *value = NULL; + int err = 0; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + switch (type) { + case ACL_TYPE_ACCESS: + if (acl) { + umode_t mode = inode->i_mode; + + err = posix_acl_equiv_mode(acl, &mode); + if (err < 0) + return err; + + if (inode->i_mode != mode) { + inode->i_mode = mode; + mark_inode_dirty(inode); + } + + if (!err) { + /* + * acl can be exactly represented in the + * traditional file mode permission bits + */ + acl = NULL; + goto out; + } + } + name = XATTR_NAME_POSIX_ACL_ACCESS; + name_len = sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1; + break; + + case ACL_TYPE_DEFAULT: + if (!S_ISDIR(inode->i_mode)) + return acl ? -EACCES : 0; + name = XATTR_NAME_POSIX_ACL_DEFAULT; + name_len = sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1; + break; + + default: + return -EINVAL; + } + + if (!acl) + goto out; + + size = posix_acl_xattr_size(acl->a_count); + value = ntfs_malloc(size); + if (!value) + return -ENOMEM; + + err = posix_acl_to_xattr(&init_user_ns, acl, value, size); + if (err) + goto out; + + err = ntfs_set_ea(inode, name, name_len, value, size, 0, locked); + if (err) + goto out; + + inode->i_flags &= ~S_NOSEC; + +out: + if (!err) + set_cached_acl(inode, type, acl); + + kfree(value); + + return err; +} + +/* + * ntfs_set_acl + * + * inode_operations::set_acl + */ +int ntfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + return ntfs_set_acl_ex(inode, acl, type, 0); +} + +static int ntfs_xattr_get_acl(struct inode *inode, int type, void *buffer, + size_t size) +{ + struct posix_acl *acl; + int err; + + if (!(inode->i_sb->s_flags & SB_POSIXACL)) + return -EOPNOTSUPP; + + acl = ntfs_get_acl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + if (!acl) + return -ENODATA; + + err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size); + ntfs_posix_acl_release(acl); + + return err; +} + +static int ntfs_xattr_set_acl(struct inode *inode, int type, const void *value, + size_t size) +{ + struct posix_acl *acl; + int err; + + if (!(inode->i_sb->s_flags & SB_POSIXACL)) + return -EOPNOTSUPP; + + if (!inode_owner_or_capable(inode)) + return -EPERM; + + if (!value) + return 0; + + acl = posix_acl_from_xattr(&init_user_ns, value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + if (acl) { + err = posix_acl_valid(&init_user_ns, acl); + if (err) + goto release_and_out; + } + + err = ntfs_set_acl(inode, acl, type); + +release_and_out: + ntfs_posix_acl_release(acl); + return err; +} + +/* + * Initialize the ACLs of a new inode. Called from ntfs_create_inode. + */ +int ntfs_init_acl(struct inode *inode, struct inode *dir) +{ + struct posix_acl *default_acl, *acl; + int err; + + /* + * TODO refactoring lock + * ni_lock(dir) ... -> posix_acl_create(dir,...) -> ntfs_get_acl -> ni_lock(dir) + */ + inode->i_default_acl = NULL; + + default_acl = ntfs_get_acl_ex(dir, ACL_TYPE_DEFAULT, 1); + + if (!default_acl || default_acl == ERR_PTR(-EOPNOTSUPP)) { + inode->i_mode &= ~current_umask(); + err = 0; + goto out; + } + + if (IS_ERR(default_acl)) { + err = PTR_ERR(default_acl); + goto out; + } + + acl = default_acl; + err = __posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); + if (err < 0) + goto out1; + if (!err) { + posix_acl_release(acl); + acl = NULL; + } + + if (!S_ISDIR(inode->i_mode)) { + posix_acl_release(default_acl); + default_acl = NULL; + } + + if (default_acl) + err = ntfs_set_acl_ex(inode, default_acl, ACL_TYPE_DEFAULT, 1); + + if (!acl) + inode->i_acl = NULL; + else if (!err) + err = ntfs_set_acl_ex(inode, acl, ACL_TYPE_ACCESS, 1); + + posix_acl_release(acl); +out1: + posix_acl_release(default_acl); + +out: + return err; +} +#endif + +/* + * ntfs_acl_chmod + * + * helper for 'ntfs3_setattr' + */ +int ntfs_acl_chmod(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + + if (!(sb->s_flags & SB_POSIXACL)) + return 0; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + return posix_acl_chmod(inode, inode->i_mode); +} + +/* + * ntfs_permission + * + * inode_operations::permission + */ +int ntfs_permission(struct inode *inode, int mask) +{ + if (ntfs_sb(inode->i_sb)->options.no_acs_rules) { + /* "no access rules" mode - allow all changes */ + return 0; + } + + return generic_permission(inode, mask); +} + +/* + * ntfs_listxattr + * + * inode_operations::listxattr + */ +ssize_t ntfs_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + struct inode *inode = d_inode(dentry); + struct ntfs_inode *ni = ntfs_i(inode); + ssize_t ret = -1; + int err; + + if (!(ni->ni_flags & NI_FLAG_EA)) { + ret = 0; + goto out; + } + + ni_lock(ni); + + err = ntfs_list_ea(ni, buffer, size, (size_t *)&ret); + + ni_unlock(ni); + + if (err) + ret = err; +out: + return ret; +} + +static int ntfs_getxattr(const struct xattr_handler *handler, struct dentry *de, + struct inode *inode, const char *name, void *buffer, + size_t size) +{ + int err; + struct ntfs_inode *ni = ntfs_i(inode); + size_t name_len = strlen(name); + + /* Dispatch request */ + if (name_len == sizeof(SYSTEM_DOS_ATTRIB) - 1 && + !memcmp(name, SYSTEM_DOS_ATTRIB, sizeof(SYSTEM_DOS_ATTRIB))) { + /* system.dos_attrib */ + if (!buffer) { + err = sizeof(u8); + } else if (size < sizeof(u8)) { + err = -ENODATA; + } else { + err = sizeof(u8); + *(u8 *)buffer = le32_to_cpu(ni->std_fa); + } + goto out; + } + + if (name_len == sizeof(SYSTEM_NTFS_ATTRIB) - 1 && + !memcmp(name, SYSTEM_NTFS_ATTRIB, sizeof(SYSTEM_NTFS_ATTRIB))) { + /* system.ntfs_attrib */ + if (!buffer) { + err = sizeof(u32); + } else if (size < sizeof(u32)) { + err = -ENODATA; + } else { + err = sizeof(u32); + *(u32 *)buffer = le32_to_cpu(ni->std_fa); + } + goto out; + } + + if (name_len == sizeof(SYSTEM_NTFS_SECURITY) - 1 && + !memcmp(name, SYSTEM_NTFS_SECURITY, sizeof(SYSTEM_NTFS_SECURITY))) { + /* system.ntfs_security*/ + struct SECURITY_DESCRIPTOR_RELATIVE *sd = NULL; + size_t sd_size = 0; + + if (!is_ntfs3(ni->mi.sbi)) { + /* we should get nt4 security */ + err = -EINVAL; + goto out; + } else if (le32_to_cpu(ni->std_security_id) < + SECURITY_ID_FIRST) { + err = -ENOENT; + goto out; + } + + err = ntfs_get_security_by_id(ni->mi.sbi, ni->std_security_id, + &sd, &sd_size); + if (err) + goto out; + + if (!is_sd_valid(sd, sd_size)) { + ntfs_inode_warn( + inode, + "looks like you get incorrect security descriptor id=%u", + ni->std_security_id); + } + + if (!buffer) { + err = sd_size; + } else if (size < sd_size) { + err = -ENODATA; + } else { + err = sd_size; + memcpy(buffer, sd, sd_size); + } + ntfs_free(sd); + goto out; + } + +#ifdef CONFIG_NTFS3_FS_POSIX_ACL + if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 && + !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS, + sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) || + (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 && + !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, + sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) { + err = ntfs_xattr_get_acl( + inode, + name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 + ? ACL_TYPE_ACCESS + : ACL_TYPE_DEFAULT, + buffer, size); + goto out; + } +#endif + /* deal with ntfs extended attribute */ + err = ntfs_get_ea(inode, name, name_len, buffer, size, NULL); + +out: + return err; +} + +/* + * ntfs_setxattr + * + * inode_operations::setxattr + */ +static noinline int ntfs_setxattr(const struct xattr_handler *handler, + struct dentry *de, struct inode *inode, + const char *name, const void *value, + size_t size, int flags) +{ + int err = -EINVAL; + struct ntfs_inode *ni = ntfs_i(inode); + size_t name_len = strlen(name); + enum FILE_ATTRIBUTE new_fa; + + /* Dispatch request */ + if (name_len == sizeof(SYSTEM_DOS_ATTRIB) - 1 && + !memcmp(name, SYSTEM_DOS_ATTRIB, sizeof(SYSTEM_DOS_ATTRIB))) { + if (sizeof(u8) != size) + goto out; + new_fa = cpu_to_le32(*(u8 *)value); + goto set_new_fa; + } + + if (name_len == sizeof(SYSTEM_NTFS_ATTRIB) - 1 && + !memcmp(name, SYSTEM_NTFS_ATTRIB, sizeof(SYSTEM_NTFS_ATTRIB))) { + if (size != sizeof(u32)) + goto out; + new_fa = cpu_to_le32(*(u32 *)value); + + if (S_ISREG(inode->i_mode)) { + /* Process compressed/sparsed in special way*/ + ni_lock(ni); + err = ni_new_attr_flags(ni, new_fa); + ni_unlock(ni); + if (err) + goto out; + } +set_new_fa: + /* + * Thanks Mark Harmstone: + * keep directory bit consistency + */ + if (S_ISDIR(inode->i_mode)) + new_fa |= FILE_ATTRIBUTE_DIRECTORY; + else + new_fa &= ~FILE_ATTRIBUTE_DIRECTORY; + + if (ni->std_fa != new_fa) { + ni->std_fa = new_fa; + if (new_fa & FILE_ATTRIBUTE_READONLY) + inode->i_mode &= ~0222; + else + inode->i_mode |= 0222; + /* std attribute always in primary record */ + ni->mi.dirty = true; + mark_inode_dirty(inode); + } + err = 0; + + goto out; + } + + if (name_len == sizeof(SYSTEM_NTFS_SECURITY) - 1 && + !memcmp(name, SYSTEM_NTFS_SECURITY, sizeof(SYSTEM_NTFS_SECURITY))) { + /* system.ntfs_security*/ + __le32 security_id; + bool inserted; + struct ATTR_STD_INFO5 *std; + + if (!is_ntfs3(ni->mi.sbi)) { + /* + * we should replace ATTR_SECURE + * Skip this way cause it is nt4 feature + */ + err = -EINVAL; + goto out; + } + + if (!is_sd_valid(value, size)) { + err = -EINVAL; + ntfs_inode_warn( + inode, + "you try to set invalid security descriptor"); + goto out; + } + + err = ntfs_insert_security(ni->mi.sbi, value, size, + &security_id, &inserted); + if (err) + goto out; + + ni_lock(ni); + std = ni_std5(ni); + if (!std) { + err = -EINVAL; + } else if (std->security_id != security_id) { + std->security_id = ni->std_security_id = security_id; + /* std attribute always in primary record */ + ni->mi.dirty = true; + mark_inode_dirty(&ni->vfs_inode); + } + ni_unlock(ni); + goto out; + } + +#ifdef CONFIG_NTFS3_FS_POSIX_ACL + if ((name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 && + !memcmp(name, XATTR_NAME_POSIX_ACL_ACCESS, + sizeof(XATTR_NAME_POSIX_ACL_ACCESS))) || + (name_len == sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1 && + !memcmp(name, XATTR_NAME_POSIX_ACL_DEFAULT, + sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)))) { + err = ntfs_xattr_set_acl( + inode, + name_len == sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1 + ? ACL_TYPE_ACCESS + : ACL_TYPE_DEFAULT, + value, size); + goto out; + } +#endif + /* deal with ntfs extended attribute */ + err = ntfs_set_ea(inode, name, name_len, value, size, flags, 0); + +out: + return err; +} + +static bool ntfs_xattr_user_list(struct dentry *dentry) +{ + return 1; +} + +static const struct xattr_handler ntfs_xattr_handler = { + .prefix = "", + .get = ntfs_getxattr, + .set = ntfs_setxattr, + .list = ntfs_xattr_user_list, +}; + +const struct xattr_handler *ntfs_xattr_handlers[] = { + &ntfs_xattr_handler, + NULL, +}; diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 0179a73a3fa2..12a7590601dd 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -2042,7 +2042,7 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g o2hb_nego_timeout_handler, reg, NULL, ®->hr_handler_list); if (ret) - goto free; + goto remove_item; ret = o2net_register_handler(O2HB_NEGO_APPROVE_MSG, reg->hr_key, sizeof(struct o2hb_nego_msg), @@ -2057,6 +2057,12 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g unregister_handler: o2net_unregister_handler_list(®->hr_handler_list); +remove_item: + spin_lock(&o2hb_live_lock); + list_del(®->hr_all_item); + if (o2hb_global_heartbeat_active()) + clear_bit(reg->hr_region_num, o2hb_region_bitmap); + spin_unlock(&o2hb_live_lock); free: kfree(reg); return ERR_PTR(ret); diff --git a/fs/pnode.h b/fs/pnode.h index 26f74e092bd9..988f1aa9b02a 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -12,7 +12,7 @@ #define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED) #define IS_MNT_SLAVE(m) ((m)->mnt_master) -#define IS_MNT_NEW(m) (!(m)->mnt_ns) +#define IS_MNT_NEW(m) (!(m)->mnt_ns || is_anon_ns((m)->mnt_ns)) #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED) #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE) #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED) diff --git a/fs/proc/base.c b/fs/proc/base.c index b3422cda2a91..ddecff0c1943 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -96,6 +96,8 @@ #include #include #include +#include +#include #include #include "internal.h" #include "fd.h" @@ -3150,6 +3152,144 @@ static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns, } #endif /* CONFIG_STACKLEAK_METRICS */ +#ifdef CONFIG_KSM +static int ksm_open(struct inode *inode, struct file *file) +{ + struct task_struct *task; + struct mm_struct *mm; + int err; + + task = get_proc_task(inode); + if (!task) { + err = -ESRCH; + goto out; + } + if (task->flags & PF_KTHREAD) { + put_task_struct(task); + err = -EINVAL; + goto out; + } + + mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS); + put_task_struct(task); + if (!mm) { + err = -EINVAL; + goto out; + } + if (IS_ERR(mm)) { + err = PTR_ERR(mm); + goto out; + } + + /* ensure this mm_struct can't be freed */ + mmgrab(mm); + /* but do not pin its memory */ + mmput(mm); + + err = 0; + file->private_data = mm; + +out: + return err; +} + +static ssize_t ksm_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + char kbuf[PROC_NUMBUF]; + char *pos; + int behaviour; + struct mm_struct *mm = file->private_data; + int err; + int last_err; + struct vm_area_struct *vma; + + if (!mm) { + err = -EINVAL; + goto out; + } + + /* Only allow a very narrow range of strings to be written */ + if ((*ppos != 0) || (count >= sizeof(kbuf))) { + err = -EINVAL; + goto out; + } + + /* What was written? */ + if (copy_from_user(kbuf, buf, count)) { + err = -EFAULT; + goto out; + } + kbuf[count] = '\0'; + pos = kbuf; + + /* What is being requested? */ + if (strncmp(pos, "merge", 5) == 0) { + pos += 5; + behaviour = MADV_MERGEABLE; + } + else if (strncmp(pos, "unmerge", 7) == 0) { + pos += 7; + behaviour = MADV_UNMERGEABLE; + } + else { + err = -EINVAL; + goto out; + } + + /* Verify there is not trailing junk on the line */ + pos = skip_spaces(pos); + if (*pos != '\0') { + err = -EINVAL; + goto out; + } + + if (!mmget_not_zero(mm)) { + err = -EINVAL; + goto out; + } + + mmap_write_lock(mm); + + err = 0; + + vma = mm->mmap; + while (vma) { + if (behaviour == MADV_MERGEABLE) + last_err = ksm_madvise_merge(vma->vm_mm, vma, &vma->vm_flags); + else + last_err = ksm_madvise_unmerge(vma, vma->vm_start, vma->vm_end, &vma->vm_flags); + if (last_err) + err = last_err; + vma = vma->vm_next; + } + + mmap_write_unlock(mm); + + mmput(mm); + +out: + return err ? err : count; +} + +static int ksm_release(struct inode *inode, struct file *file) +{ + struct mm_struct *mm = file->private_data; + + if (mm) + mmdrop(mm); + + return 0; +} + +static const struct file_operations proc_ksm_operations = { + .open = ksm_open, + .write = ksm_write, + .llseek = noop_llseek, + .release = ksm_release, +}; +#endif /* CONFIG_KSM */ + /* * Thread groups */ @@ -3266,6 +3406,9 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_SECCOMP_CACHE_DEBUG ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), #endif +#ifdef CONFIG_KSM + REG("ksm", S_IRUGO|S_IWUSR, proc_ksm_operations), +#endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index d2018f70d1fa..070d2df8ab9c 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -571,7 +571,7 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter, error = -ENOMEM; if (count >= KMALLOC_MAX_SIZE) goto out; - kbuf = kzalloc(count + 1, GFP_KERNEL); + kbuf = kvzalloc(count + 1, GFP_KERNEL); if (!kbuf) goto out; @@ -600,7 +600,7 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter, error = count; out_free_buf: - kfree(kbuf); + kvfree(kbuf); out: sysctl_head_finish(head); diff --git a/fs/proc/self.c b/fs/proc/self.c index cc71ce3466dc..a4012154e109 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -20,7 +20,7 @@ static const char *proc_self_get_link(struct dentry *dentry, * Not currently supported. Once we can inherit all of struct pid, * we can allow this. */ - if (current->flags & PF_KTHREAD) + if (current->flags & PF_IO_WORKER) return ERR_PTR(-EOPNOTSUPP); if (!tgid) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 602e3a52884d..3cec6fbef725 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1210,7 +1210,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, struct mm_struct *mm; struct vm_area_struct *vma; enum clear_refs_types type; - struct mmu_gather tlb; int itype; int rv; @@ -1249,7 +1248,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, goto out_unlock; } - tlb_gather_mmu(&tlb, mm, 0, -1); if (type == CLEAR_REFS_SOFT_DIRTY) { for (vma = mm->mmap; vma; vma = vma->vm_next) { if (!(vma->vm_flags & VM_SOFTDIRTY)) @@ -1258,15 +1256,18 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, vma_set_page_prot(vma); } + inc_tlb_flush_pending(mm); mmu_notifier_range_init(&range, MMU_NOTIFY_SOFT_DIRTY, 0, NULL, mm, 0, -1UL); mmu_notifier_invalidate_range_start(&range); } walk_page_range(mm, 0, mm->highest_vm_end, &clear_refs_walk_ops, &cp); - if (type == CLEAR_REFS_SOFT_DIRTY) + if (type == CLEAR_REFS_SOFT_DIRTY) { mmu_notifier_invalidate_range_end(&range); - tlb_finish_mmu(&tlb, 0, -1); + flush_tlb_mm(mm); + dec_tlb_flush_pending(mm); + } out_unlock: mmap_write_unlock(mm); out_mm: diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index a553273fbd41..d56681d86d28 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -17,6 +17,13 @@ static const char *proc_thread_self_get_link(struct dentry *dentry, pid_t pid = task_pid_nr_ns(current, ns); char *name; + /* + * Not currently supported. Once we can inherit all of struct pid, + * we can allow this. + */ + if (current->flags & PF_IO_WORKER) + return ERR_PTR(-EOPNOTSUPP); + if (!pid) return ERR_PTR(-ENOENT); name = kmalloc(10 + 6 + 10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC); diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 93a217e4f563..14658b009f1b 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -467,7 +467,7 @@ static struct dentry *pstore_mount(struct file_system_type *fs_type, static void pstore_kill_sb(struct super_block *sb) { mutex_lock(&pstore_sb_lock); - WARN_ON(pstore_sb != sb); + WARN_ON(pstore_sb && pstore_sb != sb); kill_litter_super(sb); pstore_sb = NULL; diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 32f64abc277c..67b194ba1b03 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -218,7 +218,7 @@ static int zbufsize_842(size_t size) #if IS_ENABLED(CONFIG_PSTORE_ZSTD_COMPRESS) static int zbufsize_zstd(size_t size) { - return ZSTD_compressBound(size); + return zstd_compress_bound(size); } #endif @@ -269,7 +269,7 @@ static int pstore_compress(const void *in, void *out, { int ret; - if (!IS_ENABLED(CONFIG_PSTORE_COMPRESSION)) + if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS)) return -EINVAL; ret = crypto_comp_compress(tfm, in, inlen, out, &outlen); @@ -671,7 +671,7 @@ static void decompress_record(struct pstore_record *record) int unzipped_len; char *unzipped, *workspace; - if (!IS_ENABLED(CONFIG_PSTORE_COMPRESSION) || !record->compressed) + if (!IS_ENABLED(CONFIG_PSTORE_COMPRESS) || !record->compressed) return; /* Only PSTORE_TYPE_DMESG support compression. */ diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c index c21106557a37..b1467f3921c2 100644 --- a/fs/quota/quota_v2.c +++ b/fs/quota/quota_v2.c @@ -164,19 +164,24 @@ static int v2_read_file_info(struct super_block *sb, int type) quota_error(sb, "Number of blocks too big for quota file size (%llu > %llu).", (loff_t)qinfo->dqi_blocks << qinfo->dqi_blocksize_bits, i_size_read(sb_dqopt(sb)->files[type])); - goto out; + goto out_free; } if (qinfo->dqi_free_blk >= qinfo->dqi_blocks) { quota_error(sb, "Free block number too big (%u >= %u).", qinfo->dqi_free_blk, qinfo->dqi_blocks); - goto out; + goto out_free; } if (qinfo->dqi_free_entry >= qinfo->dqi_blocks) { quota_error(sb, "Block with free entry too big (%u >= %u).", qinfo->dqi_free_entry, qinfo->dqi_blocks); - goto out; + goto out_free; } ret = 0; +out_free: + if (ret) { + kfree(info->dqi_priv); + info->dqi_priv = NULL; + } out: up_read(&dqopt->dqio_sem); return ret; diff --git a/fs/select.c b/fs/select.c index 37aaa8317f3a..945896d0ac9e 100644 --- a/fs/select.c +++ b/fs/select.c @@ -1055,10 +1055,9 @@ static long do_restart_poll(struct restart_block *restart_block) ret = do_sys_poll(ufds, nfds, to); - if (ret == -ERESTARTNOHAND) { - restart_block->fn = do_restart_poll; - ret = -ERESTART_RESTARTBLOCK; - } + if (ret == -ERESTARTNOHAND) + ret = set_restart_fn(restart_block, do_restart_poll); + return ret; } @@ -1080,7 +1079,6 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, struct restart_block *restart_block; restart_block = ¤t->restart_block; - restart_block->fn = do_restart_poll; restart_block->poll.ufds = ufds; restart_block->poll.nfds = nfds; @@ -1091,7 +1089,7 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds, } else restart_block->poll.has_timeout = 0; - ret = -ERESTART_RESTARTBLOCK; + ret = set_restart_fn(restart_block, do_restart_poll); } return ret; } diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c index eb02072d28dd..723763746238 100644 --- a/fs/squashfs/export.c +++ b/fs/squashfs/export.c @@ -152,14 +152,18 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb, start = le64_to_cpu(table[n]); end = le64_to_cpu(table[n + 1]); - if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) { + if (start >= end + || (end - start) > + (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) { kfree(table); return ERR_PTR(-EINVAL); } } start = le64_to_cpu(table[indexes - 1]); - if (start >= lookup_table_start || (lookup_table_start - start) > SQUASHFS_METADATA_SIZE) { + if (start >= lookup_table_start || + (lookup_table_start - start) > + (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c index 11581bf31af4..ea5387679723 100644 --- a/fs/squashfs/id.c +++ b/fs/squashfs/id.c @@ -97,14 +97,16 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb, start = le64_to_cpu(table[n]); end = le64_to_cpu(table[n + 1]); - if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) { + if (start >= end || (end - start) > + (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) { kfree(table); return ERR_PTR(-EINVAL); } } start = le64_to_cpu(table[indexes - 1]); - if (start >= id_table_start || (id_table_start - start) > SQUASHFS_METADATA_SIZE) { + if (start >= id_table_start || (id_table_start - start) > + (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 8d64edb80ebf..b3fdc8212c5f 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -17,6 +17,7 @@ /* size of metadata (inode and directory) blocks */ #define SQUASHFS_METADATA_SIZE 8192 +#define SQUASHFS_BLOCK_OFFSET 2 /* default size of block device I/O */ #ifdef CONFIG_SQUASHFS_4K_DEVBLK_SIZE diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c index ead66670b41a..087cab8c78f4 100644 --- a/fs/squashfs/xattr_id.c +++ b/fs/squashfs/xattr_id.c @@ -109,14 +109,16 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start, start = le64_to_cpu(table[n]); end = le64_to_cpu(table[n + 1]); - if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) { + if (start >= end || (end - start) > + (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) { kfree(table); return ERR_PTR(-EINVAL); } } start = le64_to_cpu(table[indexes - 1]); - if (start >= table_start || (table_start - start) > SQUASHFS_METADATA_SIZE) { + if (start >= table_start || (table_start - start) > + (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/zstd_wrapper.c b/fs/squashfs/zstd_wrapper.c index b7cb1faa652d..6967c0aae801 100644 --- a/fs/squashfs/zstd_wrapper.c +++ b/fs/squashfs/zstd_wrapper.c @@ -34,7 +34,7 @@ static void *zstd_init(struct squashfs_sb_info *msblk, void *buff) goto failed; wksp->window_size = max_t(size_t, msblk->block_size, SQUASHFS_METADATA_SIZE); - wksp->mem_size = ZSTD_DStreamWorkspaceBound(wksp->window_size); + wksp->mem_size = zstd_dstream_workspace_bound(wksp->window_size); wksp->mem = vmalloc(wksp->mem_size); if (wksp->mem == NULL) goto failed; @@ -63,15 +63,15 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm, struct squashfs_page_actor *output) { struct workspace *wksp = strm; - ZSTD_DStream *stream; + zstd_dstream *stream; size_t total_out = 0; int error = 0; - ZSTD_inBuffer in_buf = { NULL, 0, 0 }; - ZSTD_outBuffer out_buf = { NULL, 0, 0 }; + zstd_in_buffer in_buf = { NULL, 0, 0 }; + zstd_out_buffer out_buf = { NULL, 0, 0 }; struct bvec_iter_all iter_all = {}; struct bio_vec *bvec = bvec_init_iter_all(&iter_all); - stream = ZSTD_initDStream(wksp->window_size, wksp->mem, wksp->mem_size); + stream = zstd_init_dstream(wksp->window_size, wksp->mem, wksp->mem_size); if (!stream) { ERROR("Failed to initialize zstd decompressor\n"); @@ -116,14 +116,14 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm, } total_out -= out_buf.pos; - zstd_err = ZSTD_decompressStream(stream, &out_buf, &in_buf); + zstd_err = zstd_decompress_stream(stream, &out_buf, &in_buf); total_out += out_buf.pos; /* add the additional data produced */ if (zstd_err == 0) break; - if (ZSTD_isError(zstd_err)) { + if (zstd_is_error(zstd_err)) { ERROR("zstd decompression error: %d\n", - (int)ZSTD_getErrorCode(zstd_err)); + (int)zstd_get_error_code(zstd_err)); error = -EIO; break; } diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c index 51a7c8c2c3f0..e564d5ff8781 100644 --- a/fs/ubifs/auth.c +++ b/fs/ubifs/auth.c @@ -327,7 +327,7 @@ int ubifs_init_authentication(struct ubifs_info *c) ubifs_err(c, "hmac %s is bigger than maximum allowed hmac size (%d > %d)", hmac_name, c->hmac_desc_len, UBIFS_HMAC_ARR_SZ); err = -EINVAL; - goto out_free_hash; + goto out_free_hmac; } err = crypto_shash_setkey(c->hmac_tfm, ukp->data, ukp->datalen); diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 79801c9a5b87..0f8a6a16421b 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -559,7 +559,9 @@ static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) } /* authenticate_sleb_hash is split out for stack usage */ -static int authenticate_sleb_hash(struct ubifs_info *c, struct shash_desc *log_hash, u8 *hash) +static int noinline_for_stack +authenticate_sleb_hash(struct ubifs_info *c, + struct shash_desc *log_hash, u8 *hash) { SHASH_DESC_ON_STACK(hash_desc, c->hash_tfm); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 138b9426c6c1..ddb2ca636c93 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -838,8 +838,10 @@ static int alloc_wbufs(struct ubifs_info *c) c->jheads[i].wbuf.jhead = i; c->jheads[i].grouped = 1; c->jheads[i].log_hash = ubifs_hash_get_desc(c); - if (IS_ERR(c->jheads[i].log_hash)) + if (IS_ERR(c->jheads[i].log_hash)) { + err = PTR_ERR(c->jheads[i].log_hash); goto out; + } } /* diff --git a/fs/udf/inode.c b/fs/udf/inode.c index bb89c3e43212..0dd2f93ac048 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -544,11 +544,14 @@ static int udf_do_extend_file(struct inode *inode, udf_write_aext(inode, last_pos, &last_ext->extLocation, last_ext->extLength, 1); + /* - * We've rewritten the last extent but there may be empty - * indirect extent after it - enter it. + * We've rewritten the last extent. If we are going to add + * more extents, we may need to enter possible following + * empty indirect extent. */ - udf_next_aext(inode, last_pos, &tmploc, &tmplen, 0); + if (new_block_bytes || prealloc_len) + udf_next_aext(inode, last_pos, &tmploc, &tmplen, 0); } /* Managed to do everything necessary? */ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 67c8dc9de8aa..f1e21b6cfa48 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -846,7 +846,7 @@ xfs_setattr_size( ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL)); ASSERT(S_ISREG(inode->i_mode)); ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| - ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); + ATTR_MTIME_SET|ATTR_TIMES_SET)) == 0); oldsize = inode->i_size; newsize = iattr->ia_size; diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index bec47f2d074b..2243dc1fb48f 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -159,6 +159,21 @@ static int zonefs_writepages(struct address_space *mapping, return iomap_writepages(mapping, wbc, &wpc, &zonefs_writeback_ops); } +static int zonefs_swap_activate(struct swap_info_struct *sis, + struct file *swap_file, sector_t *span) +{ + struct inode *inode = file_inode(swap_file); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + + if (zi->i_ztype != ZONEFS_ZTYPE_CNV) { + zonefs_err(inode->i_sb, + "swap file: not a conventional zone file\n"); + return -EINVAL; + } + + return iomap_swapfile_activate(sis, swap_file, span, &zonefs_iomap_ops); +} + static const struct address_space_operations zonefs_file_aops = { .readpage = zonefs_readpage, .readahead = zonefs_readahead, @@ -171,6 +186,7 @@ static const struct address_space_operations zonefs_file_aops = { .is_partially_uptodate = iomap_is_partially_uptodate, .error_remove_page = generic_error_remove_page, .direct_IO = noop_direct_IO, + .swap_activate = zonefs_swap_activate, }; static void zonefs_update_stats(struct inode *inode, loff_t new_isize) @@ -250,6 +266,9 @@ static loff_t zonefs_check_zone_condition(struct inode *inode, } inode->i_mode &= ~0222; return i_size_read(inode); + case BLK_ZONE_COND_FULL: + /* The write pointer of full zones is invalid. */ + return zi->i_max_size; default: if (zi->i_ztype == ZONEFS_ZTYPE_CNV) return zi->i_max_size; @@ -716,6 +735,68 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from) return ret; } +/* + * Do not exceed the LFS limits nor the file zone size. If pos is under the + * limit it becomes a short access. If it exceeds the limit, return -EFBIG. + */ +static loff_t zonefs_write_check_limits(struct file *file, loff_t pos, + loff_t count) +{ + struct inode *inode = file_inode(file); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + loff_t limit = rlimit(RLIMIT_FSIZE); + loff_t max_size = zi->i_max_size; + + if (limit != RLIM_INFINITY) { + if (pos >= limit) { + send_sig(SIGXFSZ, current, 0); + return -EFBIG; + } + count = min(count, limit - pos); + } + + if (!(file->f_flags & O_LARGEFILE)) + max_size = min_t(loff_t, MAX_NON_LFS, max_size); + + if (unlikely(pos >= max_size)) + return -EFBIG; + + return min(count, max_size - pos); +} + +static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + struct zonefs_inode_info *zi = ZONEFS_I(inode); + loff_t count; + + if (IS_SWAPFILE(inode)) + return -ETXTBSY; + + if (!iov_iter_count(from)) + return 0; + + if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) + return -EINVAL; + + if (iocb->ki_flags & IOCB_APPEND) { + if (zi->i_ztype != ZONEFS_ZTYPE_SEQ) + return -EINVAL; + mutex_lock(&zi->i_truncate_mutex); + iocb->ki_pos = zi->i_wpoffset; + mutex_unlock(&zi->i_truncate_mutex); + } + + count = zonefs_write_check_limits(file, iocb->ki_pos, + iov_iter_count(from)); + if (count < 0) + return count; + + iov_iter_truncate(from, count); + return iov_iter_count(from); +} + /* * Handle direct writes. For sequential zone files, this is the only possible * write path. For these files, check that the user is issuing writes @@ -733,8 +814,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) struct super_block *sb = inode->i_sb; bool sync = is_sync_kiocb(iocb); bool append = false; - size_t count; - ssize_t ret; + ssize_t ret, count; /* * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT @@ -752,12 +832,11 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from) inode_lock(inode); } - ret = generic_write_checks(iocb, from); - if (ret <= 0) + count = zonefs_write_checks(iocb, from); + if (count <= 0) { + ret = count; goto inode_unlock; - - iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos); - count = iov_iter_count(from); + } if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) { ret = -EINVAL; @@ -817,12 +896,10 @@ static ssize_t zonefs_file_buffered_write(struct kiocb *iocb, inode_lock(inode); } - ret = generic_write_checks(iocb, from); + ret = zonefs_write_checks(iocb, from); if (ret <= 0) goto inode_unlock; - iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos); - ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops); if (ret > 0) iocb->ki_pos += ret; @@ -955,9 +1032,7 @@ static int zonefs_open_zone(struct inode *inode) mutex_lock(&zi->i_truncate_mutex); - zi->i_wr_refcnt++; - if (zi->i_wr_refcnt == 1) { - + if (!zi->i_wr_refcnt) { if (atomic_inc_return(&sbi->s_open_zones) > sbi->s_max_open_zones) { atomic_dec(&sbi->s_open_zones); ret = -EBUSY; @@ -967,7 +1042,6 @@ static int zonefs_open_zone(struct inode *inode) if (i_size_read(inode) < zi->i_max_size) { ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN); if (ret) { - zi->i_wr_refcnt--; atomic_dec(&sbi->s_open_zones); goto unlock; } @@ -975,6 +1049,8 @@ static int zonefs_open_zone(struct inode *inode) } } + zi->i_wr_refcnt++; + unlock: mutex_unlock(&zi->i_truncate_mutex); diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h index 2fc624a61769..f8a4afb0279a 100644 --- a/include/acpi/acexcep.h +++ b/include/acpi/acexcep.h @@ -59,11 +59,11 @@ struct acpi_exception_info { #define AE_OK (acpi_status) 0x0000 -#define ACPI_ENV_EXCEPTION(status) (status & AE_CODE_ENVIRONMENTAL) -#define ACPI_AML_EXCEPTION(status) (status & AE_CODE_AML) -#define ACPI_PROG_EXCEPTION(status) (status & AE_CODE_PROGRAMMER) -#define ACPI_TABLE_EXCEPTION(status) (status & AE_CODE_ACPI_TABLES) -#define ACPI_CNTL_EXCEPTION(status) (status & AE_CODE_CONTROL) +#define ACPI_ENV_EXCEPTION(status) (((status) & AE_CODE_MASK) == AE_CODE_ENVIRONMENTAL) +#define ACPI_AML_EXCEPTION(status) (((status) & AE_CODE_MASK) == AE_CODE_AML) +#define ACPI_PROG_EXCEPTION(status) (((status) & AE_CODE_MASK) == AE_CODE_PROGRAMMER) +#define ACPI_TABLE_EXCEPTION(status) (((status) & AE_CODE_MASK) == AE_CODE_ACPI_TABLES) +#define ACPI_CNTL_EXCEPTION(status) (((status) & AE_CODE_MASK) == AE_CODE_CONTROL) /* * Environmental exceptions diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 6d1879bf9440..37dac195adbb 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -233,6 +233,7 @@ struct acpi_pnp_type { struct acpi_device_pnp { acpi_bus_id bus_id; /* Object name */ + int instance_no; /* Instance number of this object */ struct acpi_pnp_type type; /* ID type */ acpi_bus_address bus_address; /* _ADR */ char *unique_id; /* _UID */ diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index b97c628ad91f..d7efbc5490e8 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -393,7 +393,10 @@ . = ALIGN(8); \ __start_static_call_sites = .; \ KEEP(*(.static_call_sites)) \ - __stop_static_call_sites = .; + __stop_static_call_sites = .; \ + __start_static_call_tramp_key = .; \ + KEEP(*(.static_call_tramp_key)) \ + __stop_static_call_tramp_key = .; /* * Allow architectures to handle ro_after_init data on their @@ -828,8 +831,13 @@ /* DWARF 4 */ \ .debug_types 0 : { *(.debug_types) } \ /* DWARF 5 */ \ + .debug_addr 0 : { *(.debug_addr) } \ + .debug_line_str 0 : { *(.debug_line_str) } \ + .debug_loclists 0 : { *(.debug_loclists) } \ .debug_macro 0 : { *(.debug_macro) } \ - .debug_addr 0 : { *(.debug_addr) } + .debug_names 0 : { *(.debug_names) } \ + .debug_rnglists 0 : { *(.debug_rnglists) } \ + .debug_str_offsets 0 : { *(.debug_str_offsets) } /* Stabs debugging sections. */ #define STABS_DEBUG \ @@ -988,12 +996,13 @@ #endif /* - * Clang's -fsanitize=kernel-address and -fsanitize=thread produce - * unwanted sections (.eh_frame and .init_array.*), but - * CONFIG_CONSTRUCTORS wants to keep any .init_array.* sections. + * Clang's -fprofile-arcs, -fsanitize=kernel-address, and + * -fsanitize=thread produce unwanted sections (.eh_frame + * and .init_array.*), but CONFIG_CONSTRUCTORS wants to + * keep any .init_array.* sections. * https://bugs.llvm.org/show_bug.cgi?id=46478 */ -#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KCSAN) +#if defined(CONFIG_GCOV_KERNEL) || defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KCSAN) # ifdef CONFIG_CONSTRUCTORS # define SANITIZER_DISCARDS \ *(.eh_frame) diff --git a/include/crypto/hash.h b/include/crypto/hash.h index af2ff31ff619..13f8a6a54ca8 100644 --- a/include/crypto/hash.h +++ b/include/crypto/hash.h @@ -149,7 +149,7 @@ struct ahash_alg { struct shash_desc { struct crypto_shash *tfm; - void *__ctx[] CRYPTO_MINALIGN_ATTR; + void *__ctx[] __aligned(ARCH_SLAB_MINALIGN); }; #define HASH_MAX_DIGESTSIZE 64 @@ -162,9 +162,9 @@ struct shash_desc { #define HASH_MAX_STATESIZE 512 -#define SHASH_DESC_ON_STACK(shash, ctx) \ - char __##shash##_desc[sizeof(struct shash_desc) + \ - HASH_MAX_DESCSIZE] CRYPTO_MINALIGN_ATTR; \ +#define SHASH_DESC_ON_STACK(shash, ctx) \ + char __##shash##_desc[sizeof(struct shash_desc) + HASH_MAX_DESCSIZE] \ + __aligned(__alignof__(struct shash_desc)); \ struct shash_desc *shash = (struct shash_desc *)__##shash##_desc /** diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 053bf05fb1f7..b20568c44001 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1072,19 +1072,25 @@ void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const c #if defined(CONFIG_ACPI) && defined(CONFIG_GPIOLIB) bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio); -int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index); +int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index); #else static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares, struct acpi_resource_gpio **agpio) { return false; } -static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) +static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, + const char *name, int index) { return -ENXIO; } #endif +static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) +{ + return acpi_dev_gpio_irq_get_by(adev, NULL, index); +} + /* Device properties */ #ifdef CONFIG_ACPI diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d705b174d346..b4b9604bbfd7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -140,10 +140,6 @@ struct blk_mq_hw_ctx { * shared across request queues. */ atomic_t nr_active; - /** - * @elevator_queued: Number of queued requests on hctx. - */ - atomic_t elevator_queued; /** @cpuhp_online: List to store request if CPU is going to die */ struct hlist_node cpuhp_online; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 07cb5d15e743..564ebf91793e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -22,6 +22,7 @@ #include #include #include +#include struct bpf_verifier_env; struct bpf_verifier_log; @@ -563,7 +564,8 @@ struct bpf_tramp_progs { * fentry = a set of program to run before calling original function * fexit = a set of program to run after original function */ -int arch_prepare_bpf_trampoline(void *image, void *image_end, +struct bpf_tramp_image; +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_progs *tprogs, void *orig_call); @@ -572,6 +574,8 @@ u64 notrace __bpf_prog_enter(void); void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); void notrace __bpf_prog_enter_sleepable(void); void notrace __bpf_prog_exit_sleepable(void); +void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); +void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); struct bpf_ksym { unsigned long start; @@ -590,6 +594,18 @@ enum bpf_tramp_prog_type { BPF_TRAMP_REPLACE, /* more than MAX */ }; +struct bpf_tramp_image { + void *image; + struct bpf_ksym ksym; + struct percpu_ref pcref; + void *ip_after_call; + void *ip_epilogue; + union { + struct rcu_head rcu; + struct work_struct work; + }; +}; + struct bpf_trampoline { /* hlist for trampoline_table */ struct hlist_node hlist; @@ -612,9 +628,8 @@ struct bpf_trampoline { /* Number of attached programs. A counter per kind. */ int progs_cnt[BPF_TRAMP_MAX]; /* Executable image of trampoline */ - void *image; + struct bpf_tramp_image *cur_image; u64 selector; - struct bpf_ksym ksym; }; struct bpf_attach_target_info { @@ -698,6 +713,8 @@ void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym); void bpf_image_ksym_del(struct bpf_ksym *ksym); void bpf_ksym_add(struct bpf_ksym *ksym); void bpf_ksym_del(struct bpf_ksym *ksym); +int bpf_jit_charge_modmem(u32 pages); +void bpf_jit_uncharge_modmem(u32 pages); #else static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr) @@ -788,7 +805,6 @@ struct bpf_prog_aux { bool func_proto_unreliable; bool sleepable; bool tail_call_reachable; - enum bpf_tramp_prog_type trampoline_prog_type; struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; @@ -1066,7 +1082,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, struct bpf_prog *include_prog, struct bpf_prog_array **new_array); -#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \ +#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage) \ ({ \ struct bpf_prog_array_item *_item; \ struct bpf_prog *_prog; \ @@ -1079,7 +1095,8 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array, goto _out; \ _item = &_array->items[0]; \ while ((_prog = READ_ONCE(_item->prog))) { \ - bpf_cgroup_storage_set(_item->cgroup_storage); \ + if (set_cg_storage) \ + bpf_cgroup_storage_set(_item->cgroup_storage); \ _ret &= func(_prog, ctx); \ _item++; \ } \ @@ -1140,10 +1157,10 @@ _out: \ }) #define BPF_PROG_RUN_ARRAY(array, ctx, func) \ - __BPF_PROG_RUN_ARRAY(array, ctx, func, false) + __BPF_PROG_RUN_ARRAY(array, ctx, func, false, true) #define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \ - __BPF_PROG_RUN_ARRAY(array, ctx, func, true) + __BPF_PROG_RUN_ARRAY(array, ctx, func, true, false) #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); @@ -1206,8 +1223,6 @@ void bpf_prog_sub(struct bpf_prog *prog, int i); void bpf_prog_inc(struct bpf_prog *prog); struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); -void __bpf_free_used_maps(struct bpf_prog_aux *aux, - struct bpf_map **used_maps, u32 len); void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); @@ -1403,7 +1418,10 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr, union bpf_attr __user *uattr); + +#ifndef CONFIG_BPF_JIT_ALWAYS_ON void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth); +#endif struct btf *bpf_get_btf_vmlinux(void); @@ -1673,6 +1691,9 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, return bpf_prog_get_type_dev(ufd, type, false); } +void __bpf_free_used_maps(struct bpf_prog_aux *aux, + struct bpf_map **used_maps, u32 len); + bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); int bpf_prog_offload_compile(struct bpf_prog *prog); diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index d0bd226d6bd9..54665952d6ad 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -136,6 +136,7 @@ #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC 0x07 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_WIRESPEED_EN 0x0010 +#define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_EN 0x0080 #define MII_BCM54XX_AUXCTL_SHDWSEL_MISC_RGMII_SKEW_EN 0x0100 #define MII_BCM54XX_AUXCTL_MISC_FORCE_AMDIX 0x0200 #define MII_BCM54XX_AUXCTL_MISC_WREN 0x8000 @@ -222,6 +223,9 @@ /* 11111: Mode Control Register */ #define BCM54XX_SHD_MODE 0x1f #define BCM54XX_SHD_INTF_SEL_MASK GENMASK(2, 1) /* INTERF_SEL[1:0] */ +#define BCM54XX_SHD_INTF_SEL_RGMII 0x02 +#define BCM54XX_SHD_INTF_SEL_SGMII 0x04 +#define BCM54XX_SHD_INTF_SEL_GBIC 0x06 #define BCM54XX_SHD_MODE_1000BX BIT(0) /* Enable 1000-X registers */ /* diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index fc61cf4eff1c..ce7393d397e1 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -49,8 +49,12 @@ static inline void can_skb_reserve(struct sk_buff *skb) static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) { - if (sk) { - sock_hold(sk); + /* If the socket has already been closed by user space, the + * refcount may already be 0 (and the socket will be freed + * after the last TX skb has been freed). So only increase + * socket refcount if the refcount is > 0. + */ + if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) { skb->destructor = sock_efree; skb->sk = sk; } diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 98cff1b4b088..189149de77a9 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -41,6 +41,12 @@ #define __no_sanitize_thread #endif +#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) +#define __HAVE_BUILTIN_BSWAP32__ +#define __HAVE_BUILTIN_BSWAP64__ +#define __HAVE_BUILTIN_BSWAP16__ +#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */ + #if __has_feature(undefined_behavior_sanitizer) /* GCC does not have __SANITIZE_UNDEFINED__ */ #define __no_sanitize_undefined \ diff --git a/include/linux/crypto.h b/include/linux/crypto.h index ef90e07c9635..e3abd1f8646a 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -151,9 +151,12 @@ * The macro CRYPTO_MINALIGN_ATTR (along with the void * type in the actual * declaration) is used to ensure that the crypto_tfm context structure is * aligned correctly for the given architecture so that there are no alignment - * faults for C data types. In particular, this is required on platforms such - * as arm where pointers are 32-bit aligned but there are data types such as - * u64 which require 64-bit alignment. + * faults for C data types. On architectures that support non-cache coherent + * DMA, such as ARM or arm64, it also takes into account the minimal alignment + * that is required to ensure that the context struct member does not share any + * cachelines with the rest of the struct. This is needed to ensure that cache + * maintenance for non-coherent DMA (cache invalidation in particular) does not + * affect data that may be accessed by the CPU concurrently. */ #define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 61a66fb8ebb3..50cc070cb1f7 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -246,7 +246,11 @@ struct target_type { #define dm_target_passes_integrity(type) ((type)->features & DM_TARGET_PASSES_INTEGRITY) /* - * Indicates that a target supports host-managed zoned block devices. + * Indicates support for zoned block devices: + * - DM_TARGET_ZONED_HM: the target also supports host-managed zoned + * block devices but does not support combining different zoned models. + * - DM_TARGET_MIXED_ZONED_MODEL: the target supports combining multiple + * devices with different zoned models. */ #define DM_TARGET_ZONED_HM 0x00000040 #define dm_target_supports_zoned_hm(type) ((type)->features & DM_TARGET_ZONED_HM) @@ -257,6 +261,15 @@ struct target_type { #define DM_TARGET_NOWAIT 0x00000080 #define dm_target_supports_nowait(type) ((type)->features & DM_TARGET_NOWAIT) +#ifdef CONFIG_BLK_DEV_ZONED +#define DM_TARGET_MIXED_ZONED_MODEL 0x00000200 +#define dm_target_supports_mixed_zoned_model(type) \ + ((type)->features & DM_TARGET_MIXED_ZONED_MODEL) +#else +#define DM_TARGET_MIXED_ZONED_MODEL 0x00000000 +#define dm_target_supports_mixed_zoned_model(type) (false) +#endif + struct dm_target { struct dm_table *table; struct target_type *type; @@ -325,6 +338,11 @@ struct dm_target { * whether or not its underlying devices have support. */ bool discards_supported:1; + + /* + * Set if we need to limit the number of in-flight bios when swapping. + */ + bool limit_swap_bios:1; }; void *dm_per_bio_data(struct bio *bio, size_t data_size); diff --git a/include/linux/eeprom_93xx46.h b/include/linux/eeprom_93xx46.h index eec7928ff8fe..99580c22f91a 100644 --- a/include/linux/eeprom_93xx46.h +++ b/include/linux/eeprom_93xx46.h @@ -16,6 +16,8 @@ struct eeprom_93xx46_platform_data { #define EEPROM_93XX46_QUIRK_SINGLE_WORD_READ (1 << 0) /* Instructions such as EWEN are (addrlen + 2) in length. */ #define EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH (1 << 1) +/* Add extra cycle after address during a read */ +#define EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE BIT(2) /* * optional hooks to control additional logic diff --git a/include/linux/efi.h b/include/linux/efi.h index 763b816ba19c..119262585e9b 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -72,8 +72,10 @@ typedef void *efi_handle_t; */ typedef guid_t efi_guid_t __aligned(__alignof__(u32)); -#define EFI_GUID(a,b,c,d0,d1,d2,d3,d4,d5,d6,d7) \ - GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7) +#define EFI_GUID(a, b, c, d...) (efi_guid_t){ { \ + (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff, \ + (b) & 0xff, ((b) >> 8) & 0xff, \ + (c) & 0xff, ((c) >> 8) & 0xff, d } } /* * Generic EFI table header diff --git a/include/linux/elevator.h b/include/linux/elevator.h index bacc40a0bdf3..1fe8e105b83b 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -172,6 +172,8 @@ extern struct request *elv_rb_find(struct rb_root *, sector_t); /* Supports zoned block devices sequential write constraint */ #define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0) +/* Supports scheduling on multiple hardware queues */ +#define ELEVATOR_F_MQ_AWARE (1U << 1) #endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h index 9b93f8584ff7..8b2b1d68b954 100644 --- a/include/linux/entry-kvm.h +++ b/include/linux/entry-kvm.h @@ -46,6 +46,20 @@ static inline int arch_xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu, */ int xfer_to_guest_mode_handle_work(struct kvm_vcpu *vcpu); +/** + * xfer_to_guest_mode_prepare - Perform last minute preparation work that + * need to be handled while IRQs are disabled + * upon entering to guest. + * + * Has to be invoked with interrupts disabled before the last call + * to xfer_to_guest_mode_work_pending(). + */ +static inline void xfer_to_guest_mode_prepare(void) +{ + lockdep_assert_irqs_disabled(); + rcu_nocb_flush_deferred_wakeup(); +} + /** * __xfer_to_guest_mode_work_pending - Check if work is pending * diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 0350393465d4..593322c946e6 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -18,7 +18,7 @@ struct file; #ifdef CONFIG_EPOLL -#ifdef CONFIG_CHECKPOINT_RESTORE +#ifdef CONFIG_KCMP struct file *get_epoll_tfile_raw_ptr(struct file *file, int tfd, unsigned long toff); #endif diff --git a/include/linux/filter.h b/include/linux/filter.h index 29c27656165b..5edf2b660881 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -886,7 +886,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); #define __bpf_call_base_args \ ((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \ - __bpf_call_base) + (void *)__bpf_call_base) struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index ef49307611d2..c73b25bc9213 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -674,6 +674,8 @@ struct acpi_gpio_mapping { * get GpioIo type explicitly, this quirk may be used. */ #define ACPI_GPIO_QUIRK_ONLY_GPIOIO BIT(1) +/* Use given pin as an absolute GPIO number in the system */ +#define ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER BIT(2) unsigned int quirks; }; diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 2ad6e92f124a..0bff345c4bc6 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -113,6 +113,11 @@ static inline bool hugetlb_cgroup_disabled(void) return !cgroup_subsys_enabled(hugetlb_cgrp_subsys); } +static inline void hugetlb_cgroup_put_rsvd_cgroup(struct hugetlb_cgroup *h_cg) +{ + css_put(&h_cg->css); +} + extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr); extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages, @@ -138,7 +143,8 @@ extern void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, struct file_region *rg, - unsigned long nr_pages); + unsigned long nr_pages, + bool region_del); extern void hugetlb_cgroup_file_init(void) __init; extern void hugetlb_cgroup_migrate(struct page *oldhpage, @@ -147,7 +153,8 @@ extern void hugetlb_cgroup_migrate(struct page *oldhpage, #else static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, struct file_region *rg, - unsigned long nr_pages) + unsigned long nr_pages, + bool region_del) { } @@ -185,6 +192,10 @@ static inline bool hugetlb_cgroup_disabled(void) return true; } +static inline void hugetlb_cgroup_put_rsvd_cgroup(struct hugetlb_cgroup *h_cg) +{ +} + static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, struct hugetlb_cgroup **ptr) { diff --git a/include/linux/icmpv6.h b/include/linux/icmpv6.h index 1b3371ae8193..9055cb380ee2 100644 --- a/include/linux/icmpv6.h +++ b/include/linux/icmpv6.h @@ -3,6 +3,7 @@ #define _LINUX_ICMPV6_H #include +#include #include static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb) @@ -15,13 +16,16 @@ static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb) #if IS_ENABLED(CONFIG_IPV6) typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info, - const struct in6_addr *force_saddr); -#if IS_BUILTIN(CONFIG_IPV6) + const struct in6_addr *force_saddr, + const struct inet6_skb_parm *parm); void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, - const struct in6_addr *force_saddr); -static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) + const struct in6_addr *force_saddr, + const struct inet6_skb_parm *parm); +#if IS_BUILTIN(CONFIG_IPV6) +static inline void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, + const struct inet6_skb_parm *parm) { - icmp6_send(skb, type, code, info, NULL); + icmp6_send(skb, type, code, info, NULL, parm); } static inline int inet6_register_icmp_sender(ip6_icmp_send_t *fn) { @@ -34,18 +38,28 @@ static inline int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn) return 0; } #else -extern void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info); +extern void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, + const struct inet6_skb_parm *parm); extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn); extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn); #endif +static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) +{ + __icmpv6_send(skb, type, code, info, IP6CB(skb)); +} + int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, unsigned int data_len); #if IS_ENABLED(CONFIG_NF_NAT) void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info); #else -#define icmpv6_ndo_send icmpv6_send +static inline void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) +{ + struct inet6_skb_parm parm = { 0 }; + __icmpv6_send(skb_in, type, code, info, &parm); +} #endif #else diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 96556c64c95d..10c94a3936ca 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -43,13 +43,14 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, if (likely(success)) { struct vlan_pcpu_stats *pcpu_stats; - pcpu_stats = this_cpu_ptr(vlan->pcpu_stats); + pcpu_stats = get_cpu_ptr(vlan->pcpu_stats); u64_stats_update_begin(&pcpu_stats->syncp); pcpu_stats->rx_packets++; pcpu_stats->rx_bytes += len; if (multicast) pcpu_stats->rx_multicast++; u64_stats_update_end(&pcpu_stats->syncp); + put_cpu_ptr(vlan->pcpu_stats); } else { this_cpu_inc(vlan->pcpu_stats->rx_errors); } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index efa96263b81b..d63d3e9cc7b6 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -170,7 +170,7 @@ enum iommu_dev_features { * struct iommu_iotlb_gather - Range information for a pending IOTLB flush * * @start: IOVA representing the start of the range to be flushed - * @end: IOVA representing the end of the range to be flushed (exclusive) + * @end: IOVA representing the end of the range to be flushed (inclusive) * @pgsize: The interval at which to perform the flush * * This structure is intended to be updated by multiple calls to the @@ -538,7 +538,7 @@ static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, struct iommu_iotlb_gather *gather, unsigned long iova, size_t size) { - unsigned long start = iova, end = start + size; + unsigned long start = iova, end = start + size - 1; /* * If the new page is disjoint from the current range or is mapped at diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index dda61d150a13..f514a7dd8c9c 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -84,7 +84,6 @@ struct ipv6_params { __s32 autoconf; }; extern struct ipv6_params ipv6_defaults; -#include #include #include diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 891b323266df..adb4d3b761b0 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -159,6 +159,7 @@ static inline void generic_handle_irq_desc(struct irq_desc *desc) } int generic_handle_irq(unsigned int irq); +int generic_dispatch_irq(unsigned int irq); #ifdef CONFIG_HANDLE_DOMAIN_IRQ /* diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 9e93bef52968..5f61389f5f36 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -300,6 +300,11 @@ struct kimage { /* Information for loading purgatory */ struct purgatory_info purgatory_info; #endif + +#ifdef CONFIG_IMA_KEXEC + /* Virtual address of IMA measurement buffer for kexec syscall */ + void *ima_buffer; +#endif }; /* kexec interface functions */ diff --git a/include/linux/key.h b/include/linux/key.h index 0f2e24f13c2b..eed3ce139a32 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -289,6 +289,7 @@ extern struct key *key_alloc(struct key_type *type, #define KEY_ALLOC_BUILT_IN 0x0004 /* Key is built into kernel */ #define KEY_ALLOC_BYPASS_RESTRICTION 0x0008 /* Override the check on restricted keyrings */ #define KEY_ALLOC_UID_KEYRING 0x0010 /* allocating a user or user session keyring */ +#define KEY_ALLOC_SET_KEEP 0x0020 /* Set the KEEP flag on the key/keyring */ extern void key_revoke(struct key *key); extern void key_invalidate(struct key *key); diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 0d6cf64c8bb1..3c755f6eaefd 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -360,9 +360,11 @@ extern atomic_t kgdb_active; extern bool dbg_is_early; extern void __init dbg_late_init(void); extern void kgdb_panic(const char *msg); +extern void kgdb_free_init_mem(void); #else /* ! CONFIG_KGDB */ #define in_dbg_master() (0) #define dbg_late_init() static inline void kgdb_panic(const char *msg) {} +static inline void kgdb_free_init_mem(void) { } #endif /* ! CONFIG_KGDB */ #endif /* _KGDB_H_ */ diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h index c941b7377321..2fcc01891b47 100644 --- a/include/linux/khugepaged.h +++ b/include/linux/khugepaged.h @@ -3,6 +3,7 @@ #define _LINUX_KHUGEPAGED_H #include /* MMF_VM_HUGEPAGE */ +#include #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -57,6 +58,7 @@ static inline int khugepaged_enter(struct vm_area_struct *vma, { if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags)) if ((khugepaged_always() || + (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) || (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) && !(vm_flags & VM_NOHUGEPAGE) && !test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 161e8164abcf..a5993f7b4345 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -19,6 +19,10 @@ struct stable_node; struct mem_cgroup; #ifdef CONFIG_KSM +int ksm_madvise_merge(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *vm_flags); +int ksm_madvise_unmerge(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long *vm_flags); int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); int __ksm_enter(struct mm_struct *mm); diff --git a/include/linux/memblock.h b/include/linux/memblock.h index b93c44b9121e..4ce9c8f9e684 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -460,7 +460,7 @@ static inline void memblock_free_late(phys_addr_t base, phys_addr_t size) /* * Set the allocation direction to bottom-up or top-down. */ -static inline void memblock_set_bottom_up(bool enable) +static inline __init_memblock void memblock_set_bottom_up(bool enable) { memblock.bottom_up = enable; } @@ -470,7 +470,7 @@ static inline void memblock_set_bottom_up(bool enable) * if this is true, that said, memblock will allocate memory * in bottom-up direction. */ -static inline bool memblock_bottom_up(void) +static inline __init_memblock bool memblock_bottom_up(void) { return memblock.bottom_up; } diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index eeb0b52203e9..3e1a43c9f664 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -1072,9 +1072,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm, rcu_read_unlock(); } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -void mem_cgroup_split_huge_fixup(struct page *head); -#endif +void split_page_memcg(struct page *head, unsigned int nr); #else /* CONFIG_MEMCG */ @@ -1416,7 +1414,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, return 0; } -static inline void mem_cgroup_split_huge_fixup(struct page *head) +static inline void split_page_memcg(struct page *head, unsigned int nr) { } diff --git a/include/linux/memory.h b/include/linux/memory.h index 439a89e758d8..4da95e684e20 100644 --- a/include/linux/memory.h +++ b/include/linux/memory.h @@ -27,9 +27,8 @@ struct memory_block { unsigned long start_section_nr; unsigned long state; /* serialized by the dev->lock */ int online_type; /* for passing data to online routine */ - int phys_device; /* to which fru does this belong? */ - struct device dev; int nid; /* NID for this memory block */ + struct device dev; }; int arch_get_memory_phys_device(unsigned long start_pfn); diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 79c49e7f5c30..f5b464daeeca 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -137,6 +137,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap); void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap); struct dev_pagemap *get_dev_pagemap(unsigned long pfn, struct dev_pagemap *pgmap); +bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn); unsigned long vmem_altmap_offset(struct vmem_altmap *altmap); void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns); @@ -165,6 +166,11 @@ static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn, return NULL; } +static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn) +{ + return false; +} + static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap) { return 0; diff --git a/include/linux/mfd/rohm-generic.h b/include/linux/mfd/rohm-generic.h index 4283b5b33e04..2b85b9deb03a 100644 --- a/include/linux/mfd/rohm-generic.h +++ b/include/linux/mfd/rohm-generic.h @@ -20,14 +20,12 @@ struct rohm_regmap_dev { struct regmap *regmap; }; -enum { - ROHM_DVS_LEVEL_UNKNOWN, - ROHM_DVS_LEVEL_RUN, - ROHM_DVS_LEVEL_IDLE, - ROHM_DVS_LEVEL_SUSPEND, - ROHM_DVS_LEVEL_LPSR, - ROHM_DVS_LEVEL_MAX = ROHM_DVS_LEVEL_LPSR, -}; +#define ROHM_DVS_LEVEL_RUN BIT(0) +#define ROHM_DVS_LEVEL_IDLE BIT(1) +#define ROHM_DVS_LEVEL_SUSPEND BIT(2) +#define ROHM_DVS_LEVEL_LPSR BIT(3) +#define ROHM_DVS_LEVEL_VALID_AMOUNT 4 +#define ROHM_DVS_LEVEL_UNKNOWN 0 /** * struct rohm_dvs_config - dynamic voltage scaling register descriptions diff --git a/include/linux/mm.h b/include/linux/mm.h index ecdf8a8cd6ae..992c18d5e85d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1431,16 +1431,28 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid) #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) +/* + * KASAN per-page tags are stored xor'ed with 0xff. This allows to avoid + * setting tags for all pages to native kernel tag value 0xff, as the default + * value 0x00 maps to 0xff. + */ + static inline u8 page_kasan_tag(const struct page *page) { - if (kasan_enabled()) - return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; - return 0xff; + u8 tag = 0xff; + + if (kasan_enabled()) { + tag = (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK; + tag ^= 0xff; + } + + return tag; } static inline void page_kasan_tag_set(struct page *page, u8 tag) { if (kasan_enabled()) { + tag ^= 0xff; page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT); page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT; } @@ -1658,9 +1670,11 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); int copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); +int follow_invalidate_pte(struct mm_struct *mm, unsigned long address, + struct mmu_notifier_range *range, pte_t **ptepp, + pmd_t **pmdpp, spinlock_t **ptlp); int follow_pte(struct mm_struct *mm, unsigned long address, - struct mmu_notifier_range *range, pte_t **ptepp, pmd_t **pmdpp, - spinlock_t **ptlp); + pte_t **ptepp, spinlock_t **ptlp); int follow_pfn(struct vm_area_struct *vma, unsigned long address, unsigned long *pfn); int follow_phys(struct vm_area_struct *vma, unsigned long address, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 07d9acb5b19c..61c77cfff8c2 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -23,6 +23,7 @@ #endif #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1)) +#define INIT_PASID 0 struct address_space; struct mem_cgroup; diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index b8200782dede..1a6a9eb6d3fa 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -169,11 +169,11 @@ struct mmu_notifier_ops { * the last refcount is dropped. * * If blockable argument is set to false then the callback cannot - * sleep and has to return with -EAGAIN. 0 should be returned - * otherwise. Please note that if invalidate_range_start approves - * a non-blocking behavior then the same applies to - * invalidate_range_end. - * + * sleep and has to return with -EAGAIN if sleeping would be required. + * 0 should be returned otherwise. Please note that notifiers that can + * fail invalidate_range_start are not allowed to implement + * invalidate_range_end, as there is no mechanism for informing the + * notifier that its start failed. */ int (*invalidate_range_start)(struct mmu_notifier *subscription, const struct mmu_notifier_range *range); diff --git a/include/linux/mutex.h b/include/linux/mutex.h index dcd185cbfe79..4d671fba3cab 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -185,7 +185,7 @@ extern void mutex_lock_io(struct mutex *lock); # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock) # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock) # define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock) -# define mutex_lock_io_nested(lock, subclass) mutex_lock(lock) +# define mutex_lock_io_nested(lock, subclass) mutex_lock_io(lock) #endif /* diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5ff27c12ce68..fb79ac497794 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3918,6 +3918,9 @@ int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr, struct netlink_ext_ack *extack); int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, struct netlink_ext_ack *extack); +int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack); +int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name); int dev_change_carrier(struct net_device *, bool new_carrier); int dev_get_phys_port_id(struct net_device *dev, struct netdev_phys_item_id *ppid); diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 8ebb64193757..8ec48466410a 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -227,7 +227,7 @@ struct xt_table { unsigned int valid_hooks; /* Man behind the curtain... */ - struct xt_table_info __rcu *private; + struct xt_table_info *private; /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -376,7 +376,7 @@ static inline unsigned int xt_write_recseq_begin(void) * since addend is most likely 1 */ __this_cpu_add(xt_recseq.sequence, addend); - smp_wmb(); + smp_mb(); return addend; } @@ -448,9 +448,6 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); -struct xt_table_info -*xt_table_get_private_protected(const struct xt_table *table); - #ifdef CONFIG_COMPAT #include diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index d5570deff400..b032f094a782 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -559,7 +559,6 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, return pgoff; } -/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */ struct wait_page_key { struct page *page; int bit_nr; diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9a38f579bc76..419a4d77de00 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -606,6 +606,7 @@ struct swevent_hlist { #define PERF_ATTACH_TASK 0x04 #define PERF_ATTACH_TASK_DATA 0x08 #define PERF_ATTACH_ITRACE 0x10 +#define PERF_ATTACH_SCHED_CB 0x20 struct perf_cgroup; struct perf_buffer; @@ -872,6 +873,7 @@ struct perf_cpu_context { struct list_head cgrp_cpuctx_entry; #endif + struct list_head sched_cb_entry; int sched_cb_usage; int online; diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 8fcdfa52eb4b..dad92f9e4eac 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -912,6 +912,10 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma, #define pgprot_device pgprot_noncached #endif +#ifndef pgprot_mhp +#define pgprot_mhp(prot) (prot) +#endif + #ifdef CONFIG_MMU #ifndef pgprot_modify #define pgprot_modify pgprot_modify diff --git a/include/linux/phy.h b/include/linux/phy.h index 9effb511acde..d0e64f3b53b9 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -499,6 +499,7 @@ struct macsec_ops; * * @speed: Current link speed * @duplex: Current duplex + * @port: Current port * @pause: Current pause * @asym_pause: Current asymmetric pause * @supported: Combined MAC/PHY supported linkmodes @@ -577,6 +578,7 @@ struct phy_device { */ int speed; int duplex; + int port; int pause; int asym_pause; u8 master_slave_get; diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index fd02c5fa60cb..36c2119de702 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -110,8 +110,10 @@ static inline void rcu_user_exit(void) { } #ifdef CONFIG_RCU_NOCB_CPU void rcu_init_nohz(void); +void rcu_nocb_flush_deferred_wakeup(void); #else /* #ifdef CONFIG_RCU_NOCB_CPU */ static inline void rcu_init_nohz(void) { } +static inline void rcu_nocb_flush_deferred_wakeup(void) { } #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ /** diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h index 1bbd3014f906..71902f41c919 100644 --- a/include/linux/regulator/pca9450.h +++ b/include/linux/regulator/pca9450.h @@ -147,6 +147,9 @@ enum { #define BUCK6_FPWM 0x04 #define BUCK6_ENMODE_MASK 0x03 +/* PCA9450_REG_BUCK123_PRESET_EN bit */ +#define BUCK123_PRESET_EN 0x80 + /* PCA9450_BUCK1OUT_DVS0 bits */ #define BUCK1OUT_DVS0_MASK 0x7F #define BUCK1OUT_DVS0_DEFAULT 0x14 @@ -216,4 +219,11 @@ enum { #define IRQ_THERM_105 0x02 #define IRQ_THERM_125 0x01 +/* PCA9450_REG_RESET_CTRL bits */ +#define WDOG_B_CFG_MASK 0xC0 +#define WDOG_B_CFG_NONE 0x00 +#define WDOG_B_CFG_WARM 0x40 +#define WDOG_B_CFG_COLD_LDO12 0x80 +#define WDOG_B_CFG_COLD 0xC0 + #endif /* __LINUX_REG_PCA9450_H__ */ diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 70085ca1a3fc..def5c62c93b3 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -213,7 +213,8 @@ struct page_vma_mapped_walk { static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw) { - if (pvmw->pte) + /* HugeTLB pte is set to the relevant page table entry without pte_mapped. */ + if (pvmw->pte && !PageHuge(pvmw->page)) pte_unmap(pvmw->pte); if (pvmw->ptl) spin_unlock(pvmw->ptl); diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 1ae08b8462a4..90b2a0bce11c 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -140,7 +140,8 @@ static inline bool in_vfork(struct task_struct *tsk) * another oom-unkillable task does this it should blame itself. */ rcu_read_lock(); - ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm; + ret = tsk->vfork_done && + rcu_dereference(tsk->real_parent)->mm == tsk->mm; rcu_read_unlock(); return ret; diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 2f7bb92b4c9e..f61e34fbaaea 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -664,10 +664,7 @@ typedef struct { * seqcount_latch_init() - runtime initializer for seqcount_latch_t * @s: Pointer to the seqcount_latch_t instance */ -static inline void seqcount_latch_init(seqcount_latch_t *s) -{ - seqcount_init(&s->seqcount); -} +#define seqcount_latch_init(s) seqcount_init(&(s)->seqcount) /** * raw_read_seqcount_latch() - pick even/odd latch data copy diff --git a/include/linux/soundwire/sdw.h b/include/linux/soundwire/sdw.h index f0b01b728640..d08039d65825 100644 --- a/include/linux/soundwire/sdw.h +++ b/include/linux/soundwire/sdw.h @@ -1005,6 +1005,8 @@ int sdw_bus_exit_clk_stop(struct sdw_bus *bus); int sdw_read(struct sdw_slave *slave, u32 addr); int sdw_write(struct sdw_slave *slave, u32 addr, u8 value); +int sdw_write_no_pm(struct sdw_slave *slave, u32 addr, u8 value); +int sdw_read_no_pm(struct sdw_slave *slave, u32 addr); int sdw_nread(struct sdw_slave *slave, u32 addr, size_t count, u8 *val); int sdw_nwrite(struct sdw_slave *slave, u32 addr, size_t count, u8 *val); diff --git a/include/linux/static_call.h b/include/linux/static_call.h index 695da4c9b338..04e6042d252d 100644 --- a/include/linux/static_call.h +++ b/include/linux/static_call.h @@ -107,26 +107,10 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool #define STATIC_CALL_TRAMP_ADDR(name) &STATIC_CALL_TRAMP(name) -/* - * __ADDRESSABLE() is used to ensure the key symbol doesn't get stripped from - * the symbol table so that objtool can reference it when it generates the - * .static_call_sites section. - */ -#define __static_call(name) \ -({ \ - __ADDRESSABLE(STATIC_CALL_KEY(name)); \ - &STATIC_CALL_TRAMP(name); \ -}) - #else #define STATIC_CALL_TRAMP_ADDR(name) NULL #endif - -#define DECLARE_STATIC_CALL(name, func) \ - extern struct static_call_key STATIC_CALL_KEY(name); \ - extern typeof(func) STATIC_CALL_TRAMP(name); - #define static_call_update(name, func) \ ({ \ BUILD_BUG_ON(!__same_type(*(func), STATIC_CALL_TRAMP(name))); \ @@ -154,6 +138,12 @@ struct static_call_key { }; }; +/* For finding the key associated with a trampoline */ +struct static_call_tramp_key { + s32 tramp; + s32 key; +}; + extern void __static_call_update(struct static_call_key *key, void *tramp, void *func); extern int static_call_mod_init(struct module *mod); extern int static_call_text_reserved(void *start, void *end); @@ -174,17 +164,23 @@ extern int static_call_text_reserved(void *start, void *end); }; \ ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) -#define static_call(name) __static_call(name) #define static_call_cond(name) (void)__static_call(name) #define EXPORT_STATIC_CALL(name) \ EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)) - #define EXPORT_STATIC_CALL_GPL(name) \ EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)); \ EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name)) +/* Leave the key unexported, so modules can't change static call targets: */ +#define EXPORT_STATIC_CALL_TRAMP(name) \ + EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)); \ + ARCH_ADD_TRAMP_KEY(name) +#define EXPORT_STATIC_CALL_TRAMP_GPL(name) \ + EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name)); \ + ARCH_ADD_TRAMP_KEY(name) + #elif defined(CONFIG_HAVE_STATIC_CALL) static inline int static_call_init(void) { return 0; } @@ -207,7 +203,6 @@ struct static_call_key { }; \ ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) -#define static_call(name) __static_call(name) #define static_call_cond(name) (void)__static_call(name) static inline @@ -227,11 +222,16 @@ static inline int static_call_text_reserved(void *start, void *end) #define EXPORT_STATIC_CALL(name) \ EXPORT_SYMBOL(STATIC_CALL_KEY(name)); \ EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)) - #define EXPORT_STATIC_CALL_GPL(name) \ EXPORT_SYMBOL_GPL(STATIC_CALL_KEY(name)); \ EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name)) +/* Leave the key unexported, so modules can't change static call targets: */ +#define EXPORT_STATIC_CALL_TRAMP(name) \ + EXPORT_SYMBOL(STATIC_CALL_TRAMP(name)) +#define EXPORT_STATIC_CALL_TRAMP_GPL(name) \ + EXPORT_SYMBOL_GPL(STATIC_CALL_TRAMP(name)) + #else /* Generic implementation */ static inline int static_call_init(void) { return 0; } @@ -252,9 +252,6 @@ struct static_call_key { .func = NULL, \ } -#define static_call(name) \ - ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func)) - static inline void __static_call_nop(void) { } /* diff --git a/include/linux/static_call_types.h b/include/linux/static_call_types.h index 89135bb35bf7..ae5662d368b9 100644 --- a/include/linux/static_call_types.h +++ b/include/linux/static_call_types.h @@ -4,11 +4,13 @@ #include #include +#include #define STATIC_CALL_KEY_PREFIX __SCK__ #define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX) #define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1) #define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name) +#define STATIC_CALL_KEY_STR(name) __stringify(STATIC_CALL_KEY(name)) #define STATIC_CALL_TRAMP_PREFIX __SCT__ #define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX) @@ -32,4 +34,52 @@ struct static_call_site { s32 key; }; +#define DECLARE_STATIC_CALL(name, func) \ + extern struct static_call_key STATIC_CALL_KEY(name); \ + extern typeof(func) STATIC_CALL_TRAMP(name); + +#ifdef CONFIG_HAVE_STATIC_CALL + +#define __raw_static_call(name) (&STATIC_CALL_TRAMP(name)) + +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE + +/* + * __ADDRESSABLE() is used to ensure the key symbol doesn't get stripped from + * the symbol table so that objtool can reference it when it generates the + * .static_call_sites section. + */ +#define __STATIC_CALL_ADDRESSABLE(name) \ + __ADDRESSABLE(STATIC_CALL_KEY(name)) + +#define __static_call(name) \ +({ \ + __STATIC_CALL_ADDRESSABLE(name); \ + __raw_static_call(name); \ +}) + +#else /* !CONFIG_HAVE_STATIC_CALL_INLINE */ + +#define __STATIC_CALL_ADDRESSABLE(name) +#define __static_call(name) __raw_static_call(name) + +#endif /* CONFIG_HAVE_STATIC_CALL_INLINE */ + +#ifdef MODULE +#define __STATIC_CALL_MOD_ADDRESSABLE(name) +#define static_call_mod(name) __raw_static_call(name) +#else +#define __STATIC_CALL_MOD_ADDRESSABLE(name) __STATIC_CALL_ADDRESSABLE(name) +#define static_call_mod(name) __static_call(name) +#endif + +#define static_call(name) __static_call(name) + +#else + +#define static_call(name) \ + ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func)) + +#endif /* CONFIG_HAVE_STATIC_CALL */ + #endif /* _STATIC_CALL_TYPES_H */ diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 30577c3aecf8..46fb3ebdd16e 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -128,7 +128,7 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus); #else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */ -static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, +static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { unsigned long flags; @@ -139,14 +139,15 @@ static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, return ret; } -static inline int stop_machine(cpu_stop_fn_t fn, void *data, - const struct cpumask *cpus) +static __always_inline int +stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus) { return stop_machine_cpuslocked(fn, data, cpus); } -static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, - const struct cpumask *cpus) +static __always_inline int +stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data, + const struct cpumask *cpus) { return stop_machine(fn, data, cpus); } diff --git a/include/linux/swap.h b/include/linux/swap.h index 596bc2f4d9b0..55fe2f5b6f5f 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -482,6 +482,7 @@ struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); extern void exit_swap_address_space(unsigned int type); extern struct swap_info_struct *get_swap_device(swp_entry_t entry); +sector_t swap_page_sector(struct page *page); static inline void put_swap_device(struct swap_info_struct *si) { diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 2f87377e9af7..ad960923eb9f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -225,7 +225,8 @@ struct tcp_sock { u8 compressed_ack; u8 dup_ack_counter:2, tlp_retrans:1, /* TLP is a retransmission */ - unused:5; + fast_ack_mode:2, /* which fast ack mode ? */ + unused:3; u32 chrono_start; /* Start time in jiffies of a TCP chrono */ u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u8 chrono_type:2, /* current chronograph type */ diff --git a/include/linux/textsearch.h b/include/linux/textsearch.h index 13770cfe33ad..6673e4d4ac2e 100644 --- a/include/linux/textsearch.h +++ b/include/linux/textsearch.h @@ -23,7 +23,7 @@ struct ts_config; struct ts_state { unsigned int offset; - char cb[40]; + char cb[48]; }; /** diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 9b2158c69275..157762db9d4b 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -11,6 +11,7 @@ #include #include #include +#include #ifdef CONFIG_THREAD_INFO_IN_TASK /* @@ -59,6 +60,18 @@ enum syscall_work_bit { #ifdef __KERNEL__ +#ifndef arch_set_restart_data +#define arch_set_restart_data(restart) do { } while (0) +#endif + +static inline long set_restart_fn(struct restart_block *restart, + long (*fn)(struct restart_block *)) +{ + restart->fn = fn; + arch_set_restart_data(restart); + return -ERESTART_RESTARTBLOCK; +} + #ifndef THREAD_ALIGN #define THREAD_ALIGN THREAD_SIZE #endif diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 8f4ff39f51e7..804a3f69bbd9 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -397,6 +397,10 @@ static inline u32 tpm2_rc_value(u32 rc) #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) extern int tpm_is_tpm2(struct tpm_chip *chip); +extern __must_check int tpm_try_get_ops(struct tpm_chip *chip); +extern void tpm_put_ops(struct tpm_chip *chip); +extern ssize_t tpm_transmit_cmd(struct tpm_chip *chip, struct tpm_buf *buf, + size_t min_rsp_body_length, const char *desc); extern int tpm_pcr_read(struct tpm_chip *chip, u32 pcr_idx, struct tpm_digest *digest); extern int tpm_pcr_extend(struct tpm_chip *chip, u32 pcr_idx, @@ -410,7 +414,6 @@ static inline int tpm_is_tpm2(struct tpm_chip *chip) { return -ENODEV; } - static inline int tpm_pcr_read(struct tpm_chip *chip, int pcr_idx, struct tpm_digest *digest) { diff --git a/include/linux/tty_ldisc.h b/include/linux/tty_ldisc.h index b1e6043e9917..572a07976116 100644 --- a/include/linux/tty_ldisc.h +++ b/include/linux/tty_ldisc.h @@ -185,7 +185,8 @@ struct tty_ldisc_ops { void (*close)(struct tty_struct *); void (*flush_buffer)(struct tty_struct *tty); ssize_t (*read)(struct tty_struct *tty, struct file *file, - unsigned char __user *buf, size_t nr); + unsigned char *buf, size_t nr, + void **cookie, unsigned long offset); ssize_t (*write)(struct tty_struct *tty, struct file *file, const unsigned char *buf, size_t nr); int (*ioctl)(struct tty_struct *tty, struct file *file, diff --git a/include/linux/u64_stats_sync.h b/include/linux/u64_stats_sync.h index c6abb79501b3..e81856c0ba13 100644 --- a/include/linux/u64_stats_sync.h +++ b/include/linux/u64_stats_sync.h @@ -115,12 +115,13 @@ static inline void u64_stats_inc(u64_stats_t *p) } #endif +#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) +#define u64_stats_init(syncp) seqcount_init(&(syncp)->seq) +#else static inline void u64_stats_init(struct u64_stats_sync *syncp) { -#if BITS_PER_LONG == 32 && defined(CONFIG_SMP) - seqcount_init(&syncp->seq); -#endif } +#endif static inline void u64_stats_update_begin(struct u64_stats_sync *syncp) { diff --git a/include/linux/usb.h b/include/linux/usb.h index 7d72c4e0713c..d6a41841b93e 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -746,6 +746,8 @@ extern int usb_lock_device_for_reset(struct usb_device *udev, extern int usb_reset_device(struct usb_device *dev); extern void usb_queue_reset_device(struct usb_interface *dev); +extern struct device *usb_intf_get_dma_device(struct usb_interface *intf); + #ifdef CONFIG_ACPI extern int usb_acpi_set_power_state(struct usb_device *hdev, int index, bool enable); diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 6b03fdd69d27..712363c7a2e8 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -86,6 +86,8 @@ /* lies about caching, so always sync */ \ US_FLAG(NO_SAME, 0x40000000) \ /* Cannot handle WRITE_SAME */ \ + US_FLAG(SENSE_AFTER_SYNC, 0x80000000) \ + /* Do REQUEST_SENSE after SYNCHRONIZE_CACHE */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 64cf8ebdc4ec..bd29529ac188 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -105,6 +105,8 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type); #ifdef CONFIG_USER_NS +extern int unprivileged_userns_clone; + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { if (ns) @@ -138,6 +140,8 @@ extern bool current_in_userns(const struct user_namespace *target_ns); struct ns_common *ns_get_owner(struct ns_common *ns); #else +#define unprivileged_userns_clone 0 + static inline struct user_namespace *get_user_ns(struct user_namespace *ns) { return &init_user_ns; diff --git a/include/linux/usermode_driver.h b/include/linux/usermode_driver.h index 073a9e0ec07d..ad970416260d 100644 --- a/include/linux/usermode_driver.h +++ b/include/linux/usermode_driver.h @@ -14,5 +14,6 @@ struct umd_info { int umd_load_blob(struct umd_info *info, const void *data, size_t len); int umd_unload_blob(struct umd_info *info); int fork_usermode_driver(struct umd_info *info); +void umd_cleanup_helper(struct umd_info *info); #endif /* __LINUX_USERMODE_DRIVER_H__ */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index e8a924eeea3d..6b5fcfa1e555 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -79,8 +79,13 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, if (gso_type && skb->network_header) { struct flow_keys_basic keys; - if (!skb->protocol) + if (!skb->protocol) { + __be16 protocol = dev_parse_header_protocol(skb); + virtio_net_hdr_set_proto(skb, hdr); + if (protocol && protocol != skb->protocol) + return -EINVAL; + } retry: if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys, NULL, 0, 0, 0, diff --git a/include/linux/zpool.h b/include/linux/zpool.h index 51bf43076165..e8997010612a 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -73,6 +73,7 @@ u64 zpool_get_total_size(struct zpool *pool); * @malloc: allocate mem from a pool. * @free: free mem from a pool. * @shrink: shrink the pool. + * @sleep_mapped: whether zpool driver can sleep during map. * @map: map a handle. * @unmap: unmap a handle. * @total_size: get total size of a pool. @@ -100,6 +101,7 @@ struct zpool_driver { int (*shrink)(void *pool, unsigned int pages, unsigned int *reclaimed); + bool sleep_mapped; void *(*map)(void *pool, unsigned long handle, enum zpool_mapmode mm); void (*unmap)(void *pool, unsigned long handle); @@ -112,5 +114,6 @@ void zpool_register_driver(struct zpool_driver *driver); int zpool_unregister_driver(struct zpool_driver *driver); bool zpool_evictable(struct zpool *pool); +bool zpool_can_sleep_mapped(struct zpool *pool); #endif diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 4807ca4d52e0..2a430e713ce5 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -35,7 +35,7 @@ enum zs_mapmode { struct zs_pool_stats { /* How many pages were migrated (freed) */ - unsigned long pages_compacted; + atomic_long_t pages_compacted; }; struct zs_pool; diff --git a/include/linux/zstd.h b/include/linux/zstd.h index 249575e2485f..446ecabcdd02 100644 --- a/include/linux/zstd.h +++ b/include/linux/zstd.h @@ -1,138 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. + * Copyright (c) Yann Collet, Facebook, Inc. * All rights reserved. * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of https://github.com/facebook/zstd) and + * the GPLv2 (found in the COPYING file in the root directory of + * https://github.com/facebook/zstd). You may select, at your option, one of the + * above-listed licenses. */ -#ifndef ZSTD_H -#define ZSTD_H +#ifndef LINUX_ZSTD_H +#define LINUX_ZSTD_H -/* ====== Dependency ======*/ -#include /* size_t */ +/** + * This is a kernel-style API that wraps the upstream zstd API, which cannot be + * used directly because the symbols aren't exported. It exposes the minimal + * functionality which is currently required by users of zstd in the kernel. + * Expose extra functions from lib/zstd/zstd.h as needed. + */ +/* ====== Dependency ====== */ +#include +#include +#include -/*-***************************************************************************** - * Introduction +/* ====== Helper Functions ====== */ +/** + * zstd_compress_bound() - maximum compressed size in worst case scenario + * @src_size: The size of the data to compress. * - * zstd, short for Zstandard, is a fast lossless compression algorithm, - * targeting real-time compression scenarios at zlib-level and better - * compression ratios. The zstd compression library provides in-memory - * compression and decompression functions. The library supports compression - * levels from 1 up to ZSTD_maxCLevel() which is 22. Levels >= 20, labeled - * ultra, should be used with caution, as they require more memory. - * Compression can be done in: - * - a single step, reusing a context (described as Explicit memory management) - * - unbounded multiple steps (described as Streaming compression) - * The compression ratio achievable on small data can be highly improved using - * compression with a dictionary in: - * - a single step (described as Simple dictionary API) - * - a single step, reusing a dictionary (described as Fast dictionary API) - ******************************************************************************/ - -/*====== Helper functions ======*/ + * Return: The maximum compressed size in the worst case scenario. + */ +size_t zstd_compress_bound(size_t src_size); /** - * enum ZSTD_ErrorCode - zstd error codes + * zstd_is_error() - tells if a size_t function result is an error code + * @code: The function result to check for error. * - * Functions that return size_t can be checked for errors using ZSTD_isError() - * and the ZSTD_ErrorCode can be extracted using ZSTD_getErrorCode(). + * Return: Non-zero iff the code is an error. + */ +unsigned int zstd_is_error(size_t code); + +/** + * enum zstd_error_code - zstd error codes */ -typedef enum { - ZSTD_error_no_error, - ZSTD_error_GENERIC, - ZSTD_error_prefix_unknown, - ZSTD_error_version_unsupported, - ZSTD_error_parameter_unknown, - ZSTD_error_frameParameter_unsupported, - ZSTD_error_frameParameter_unsupportedBy32bits, - ZSTD_error_frameParameter_windowTooLarge, - ZSTD_error_compressionParameter_unsupported, - ZSTD_error_init_missing, - ZSTD_error_memory_allocation, - ZSTD_error_stage_wrong, - ZSTD_error_dstSize_tooSmall, - ZSTD_error_srcSize_wrong, - ZSTD_error_corruption_detected, - ZSTD_error_checksum_wrong, - ZSTD_error_tableLog_tooLarge, - ZSTD_error_maxSymbolValue_tooLarge, - ZSTD_error_maxSymbolValue_tooSmall, - ZSTD_error_dictionary_corrupted, - ZSTD_error_dictionary_wrong, - ZSTD_error_dictionaryCreation_failed, - ZSTD_error_maxCode -} ZSTD_ErrorCode; +typedef ZSTD_ErrorCode zstd_error_code; /** - * ZSTD_maxCLevel() - maximum compression level available + * zstd_get_error_code() - translates an error function result to an error code + * @code: The function result for which zstd_is_error(code) is true. * - * Return: Maximum compression level available. + * Return: A unique error code for this error. */ -int ZSTD_maxCLevel(void); +zstd_error_code zstd_get_error_code(size_t code); + /** - * ZSTD_compressBound() - maximum compressed size in worst case scenario - * @srcSize: The size of the data to compress. + * zstd_get_error_name() - translates an error function result to a string + * @code: The function result for which zstd_is_error(code) is true. * - * Return: The maximum compressed size in the worst case scenario. + * Return: An error string corresponding to the error code. */ -size_t ZSTD_compressBound(size_t srcSize); +const char *zstd_get_error_name(size_t code); + /** - * ZSTD_isError() - tells if a size_t function result is an error code - * @code: The function result to check for error. + * zstd_min_clevel() - minimum allowed compression level * - * Return: Non-zero iff the code is an error. + * Return: The minimum allowed compression level. */ -static __attribute__((unused)) unsigned int ZSTD_isError(size_t code) -{ - return code > (size_t)-ZSTD_error_maxCode; -} +int zstd_min_clevel(void); + /** - * ZSTD_getErrorCode() - translates an error function result to a ZSTD_ErrorCode - * @functionResult: The result of a function for which ZSTD_isError() is true. + * zstd_max_clevel() - maximum allowed compression level * - * Return: The ZSTD_ErrorCode corresponding to the functionResult or 0 - * if the functionResult isn't an error. + * Return: The maximum allowed compression level. */ -static __attribute__((unused)) ZSTD_ErrorCode ZSTD_getErrorCode( - size_t functionResult) -{ - if (!ZSTD_isError(functionResult)) - return (ZSTD_ErrorCode)0; - return (ZSTD_ErrorCode)(0 - functionResult); -} +int zstd_max_clevel(void); + +/* ====== Parameter Selection ====== */ /** - * enum ZSTD_strategy - zstd compression search strategy + * enum zstd_strategy - zstd compression search strategy * - * From faster to stronger. + * From faster to stronger. See zstd_lib.h. */ -typedef enum { - ZSTD_fast, - ZSTD_dfast, - ZSTD_greedy, - ZSTD_lazy, - ZSTD_lazy2, - ZSTD_btlazy2, - ZSTD_btopt, - ZSTD_btopt2 -} ZSTD_strategy; +typedef ZSTD_strategy zstd_strategy; /** - * struct ZSTD_compressionParameters - zstd compression parameters + * struct zstd_compression_parameters - zstd compression parameters * @windowLog: Log of the largest match distance. Larger means more * compression, and more memory needed during decompression. - * @chainLog: Fully searched segment. Larger means more compression, slower, - * and more memory (useless for fast). + * @chainLog: Fully searched segment. Larger means more compression, + * slower, and more memory (useless for fast). * @hashLog: Dispatch table. Larger means more compression, * slower, and more memory. * @searchLog: Number of searches. Larger means more compression and slower. @@ -141,1017 +100,348 @@ typedef enum { * @targetLength: Acceptable match size for optimal parser (only). Larger means * more compression, and slower. * @strategy: The zstd compression strategy. + * + * See zstd_lib.h. */ -typedef struct { - unsigned int windowLog; - unsigned int chainLog; - unsigned int hashLog; - unsigned int searchLog; - unsigned int searchLength; - unsigned int targetLength; - ZSTD_strategy strategy; -} ZSTD_compressionParameters; +typedef ZSTD_compressionParameters zstd_compression_parameters; /** - * struct ZSTD_frameParameters - zstd frame parameters - * @contentSizeFlag: Controls whether content size will be present in the frame - * header (when known). - * @checksumFlag: Controls whether a 32-bit checksum is generated at the end - * of the frame for error detection. - * @noDictIDFlag: Controls whether dictID will be saved into the frame header - * when using dictionary compression. + * struct zstd_frame_parameters - zstd frame parameters + * @contentSizeFlag: Controls whether content size will be present in the + * frame header (when known). + * @checksumFlag: Controls whether a 32-bit checksum is generated at the + * end of the frame for error detection. + * @noDictIDFlag: Controls whether dictID will be saved into the frame + * header when using dictionary compression. * - * The default value is all fields set to 0. + * The default value is all fields set to 0. See zstd_lib.h. */ -typedef struct { - unsigned int contentSizeFlag; - unsigned int checksumFlag; - unsigned int noDictIDFlag; -} ZSTD_frameParameters; +typedef ZSTD_frameParameters zstd_frame_parameters; /** - * struct ZSTD_parameters - zstd parameters + * struct zstd_parameters - zstd parameters * @cParams: The compression parameters. * @fParams: The frame parameters. */ -typedef struct { - ZSTD_compressionParameters cParams; - ZSTD_frameParameters fParams; -} ZSTD_parameters; +typedef ZSTD_parameters zstd_parameters; /** - * ZSTD_getCParams() - returns ZSTD_compressionParameters for selected level - * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). - * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. - * @dictSize: The dictionary size or 0 if a dictionary isn't being used. + * zstd_get_params() - returns zstd_parameters for selected level + * @level: The compression level + * @estimated_src_size: The estimated source size to compress or 0 + * if unknown. * - * Return: The selected ZSTD_compressionParameters. + * Return: The selected zstd_parameters. */ -ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, - unsigned long long estimatedSrcSize, size_t dictSize); +zstd_parameters zstd_get_params(int level, + unsigned long long estimated_src_size); -/** - * ZSTD_getParams() - returns ZSTD_parameters for selected level - * @compressionLevel: The compression level from 1 to ZSTD_maxCLevel(). - * @estimatedSrcSize: The estimated source size to compress or 0 if unknown. - * @dictSize: The dictionary size or 0 if a dictionary isn't being used. - * - * The same as ZSTD_getCParams() except also selects the default frame - * parameters (all zero). - * - * Return: The selected ZSTD_parameters. - */ -ZSTD_parameters ZSTD_getParams(int compressionLevel, - unsigned long long estimatedSrcSize, size_t dictSize); +/* ====== Single-pass Compression ====== */ -/*-************************************* - * Explicit memory management - **************************************/ +typedef ZSTD_CCtx zstd_cctx; /** - * ZSTD_CCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_CCtx - * @cParams: The compression parameters to be used for compression. + * zstd_cctx_workspace_bound() - max memory needed to initialize a zstd_cctx + * @parameters: The compression parameters to be used. * * If multiple compression parameters might be used, the caller must call - * ZSTD_CCtxWorkspaceBound() for each set of parameters and use the maximum + * zstd_cctx_workspace_bound() for each set of parameters and use the maximum * size. * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initCCtx(). + * Return: A lower bound on the size of the workspace that is passed to + * zstd_init_cctx(). */ -size_t ZSTD_CCtxWorkspaceBound(ZSTD_compressionParameters cParams); +size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *parameters); /** - * struct ZSTD_CCtx - the zstd compression context - * - * When compressing many times it is recommended to allocate a context just once - * and reuse it for each successive compression operation. - */ -typedef struct ZSTD_CCtx_s ZSTD_CCtx; -/** - * ZSTD_initCCtx() - initialize a zstd compression context - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. Use ZSTD_CCtxWorkspaceBound() to - * determine how large the workspace must be. - * - * Return: A compression context emplaced into workspace. - */ -ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize); - -/** - * ZSTD_compressCCtx() - compress src into dst - * @ctx: The context. Must have been initialized with a workspace at - * least as large as ZSTD_CCtxWorkspaceBound(params.cParams). - * @dst: The buffer to compress src into. - * @dstCapacity: The size of the destination buffer. May be any size, but - * ZSTD_compressBound(srcSize) is guaranteed to be large enough. - * @src: The data to compress. - * @srcSize: The size of the data to compress. - * @params: The parameters to use for compression. See ZSTD_getParams(). - * - * Return: The compressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize, ZSTD_parameters params); - -/** - * ZSTD_DCtxWorkspaceBound() - amount of memory needed to initialize a ZSTD_DCtx - * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initDCtx(). - */ -size_t ZSTD_DCtxWorkspaceBound(void); - -/** - * struct ZSTD_DCtx - the zstd decompression context - * - * When decompressing many times it is recommended to allocate a context just - * once and reuse it for each successive decompression operation. - */ -typedef struct ZSTD_DCtx_s ZSTD_DCtx; -/** - * ZSTD_initDCtx() - initialize a zstd decompression context - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. Use ZSTD_DCtxWorkspaceBound() to - * determine how large the workspace must be. - * - * Return: A decompression context emplaced into workspace. - */ -ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize); - -/** - * ZSTD_decompressDCtx() - decompress zstd compressed src into dst - * @ctx: The decompression context. - * @dst: The buffer to decompress src into. - * @dstCapacity: The size of the destination buffer. Must be at least as large - * as the decompressed size. If the caller cannot upper bound the - * decompressed size, then it's better to use the streaming API. - * @src: The zstd compressed data to decompress. Multiple concatenated - * frames and skippable frames are allowed. - * @srcSize: The exact size of the data to decompress. - * - * Return: The decompressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_decompressDCtx(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); - -/*-************************ - * Simple dictionary API - **************************/ - -/** - * ZSTD_compress_usingDict() - compress src into dst using a dictionary - * @ctx: The context. Must have been initialized with a workspace at - * least as large as ZSTD_CCtxWorkspaceBound(params.cParams). - * @dst: The buffer to compress src into. - * @dstCapacity: The size of the destination buffer. May be any size, but - * ZSTD_compressBound(srcSize) is guaranteed to be large enough. - * @src: The data to compress. - * @srcSize: The size of the data to compress. - * @dict: The dictionary to use for compression. - * @dictSize: The size of the dictionary. - * @params: The parameters to use for compression. See ZSTD_getParams(). - * - * Compression using a predefined dictionary. The same dictionary must be used - * during decompression. - * - * Return: The compressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize, const void *dict, size_t dictSize, - ZSTD_parameters params); - -/** - * ZSTD_decompress_usingDict() - decompress src into dst using a dictionary - * @ctx: The decompression context. - * @dst: The buffer to decompress src into. - * @dstCapacity: The size of the destination buffer. Must be at least as large - * as the decompressed size. If the caller cannot upper bound the - * decompressed size, then it's better to use the streaming API. - * @src: The zstd compressed data to decompress. Multiple concatenated - * frames and skippable frames are allowed. - * @srcSize: The exact size of the data to decompress. - * @dict: The dictionary to use for decompression. The same dictionary - * must've been used to compress the data. - * @dictSize: The size of the dictionary. - * - * Return: The decompressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_decompress_usingDict(ZSTD_DCtx *ctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize, const void *dict, size_t dictSize); - -/*-************************** - * Fast dictionary API - ***************************/ - -/** - * ZSTD_CDictWorkspaceBound() - memory needed to initialize a ZSTD_CDict - * @cParams: The compression parameters to be used for compression. + * zstd_init_cctx() - initialize a zstd compression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspace_size: The size of workspace. Use zstd_cctx_workspace_bound() to + * determine how large the workspace must be. * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initCDict(). - */ -size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams); - -/** - * struct ZSTD_CDict - a digested dictionary to be used for compression + * Return: A zstd compression context or NULL on error. */ -typedef struct ZSTD_CDict_s ZSTD_CDict; +zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size); /** - * ZSTD_initCDict() - initialize a digested dictionary for compression - * @dictBuffer: The dictionary to digest. The buffer is referenced by the - * ZSTD_CDict so it must outlive the returned ZSTD_CDict. - * @dictSize: The size of the dictionary. - * @params: The parameters to use for compression. See ZSTD_getParams(). - * @workspace: The workspace. It must outlive the returned ZSTD_CDict. - * @workspaceSize: The workspace size. Must be at least - * ZSTD_CDictWorkspaceBound(params.cParams). + * zstd_compress_cctx() - compress src into dst with the initialized parameters + * @cctx: The context. Must have been initialized with zstd_init_cctx(). + * @dst: The buffer to compress src into. + * @dst_capacity: The size of the destination buffer. May be any size, but + * ZSTD_compressBound(srcSize) is guaranteed to be large enough. + * @src: The data to compress. + * @src_size: The size of the data to compress. + * @parameters: The compression parameters to be used. * - * When compressing multiple messages / blocks with the same dictionary it is - * recommended to load it just once. The ZSTD_CDict merely references the - * dictBuffer, so it must outlive the returned ZSTD_CDict. - * - * Return: The digested dictionary emplaced into workspace. + * Return: The compressed size or an error, which can be checked using + * zstd_is_error(). */ -ZSTD_CDict *ZSTD_initCDict(const void *dictBuffer, size_t dictSize, - ZSTD_parameters params, void *workspace, size_t workspaceSize); +size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size, const zstd_parameters *parameters); -/** - * ZSTD_compress_usingCDict() - compress src into dst using a ZSTD_CDict - * @ctx: The context. Must have been initialized with a workspace at - * least as large as ZSTD_CCtxWorkspaceBound(cParams) where - * cParams are the compression parameters used to initialize the - * cdict. - * @dst: The buffer to compress src into. - * @dstCapacity: The size of the destination buffer. May be any size, but - * ZSTD_compressBound(srcSize) is guaranteed to be large enough. - * @src: The data to compress. - * @srcSize: The size of the data to compress. - * @cdict: The digested dictionary to use for compression. - * @params: The parameters to use for compression. See ZSTD_getParams(). - * - * Compression using a digested dictionary. The same dictionary must be used - * during decompression. - * - * Return: The compressed size or an error, which can be checked using - * ZSTD_isError(). - */ -size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize, const ZSTD_CDict *cdict); +/* ====== Single-pass Decompression ====== */ +typedef ZSTD_DCtx zstd_dctx; /** - * ZSTD_DDictWorkspaceBound() - memory needed to initialize a ZSTD_DDict + * zstd_dctx_workspace_bound() - max memory needed to initialize a zstd_dctx * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initDDict(). - */ -size_t ZSTD_DDictWorkspaceBound(void); - -/** - * struct ZSTD_DDict - a digested dictionary to be used for decompression + * Return: A lower bound on the size of the workspace that is passed to + * zstd_init_dctx(). */ -typedef struct ZSTD_DDict_s ZSTD_DDict; +size_t zstd_dctx_workspace_bound(void); /** - * ZSTD_initDDict() - initialize a digested dictionary for decompression - * @dictBuffer: The dictionary to digest. The buffer is referenced by the - * ZSTD_DDict so it must outlive the returned ZSTD_DDict. - * @dictSize: The size of the dictionary. - * @workspace: The workspace. It must outlive the returned ZSTD_DDict. - * @workspaceSize: The workspace size. Must be at least - * ZSTD_DDictWorkspaceBound(). - * - * When decompressing multiple messages / blocks with the same dictionary it is - * recommended to load it just once. The ZSTD_DDict merely references the - * dictBuffer, so it must outlive the returned ZSTD_DDict. + * zstd_init_dctx() - initialize a zstd decompression context + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspace_size: The size of workspace. Use zstd_dctx_workspace_bound() to + * determine how large the workspace must be. * - * Return: The digested dictionary emplaced into workspace. + * Return: A zstd decompression context or NULL on error. */ -ZSTD_DDict *ZSTD_initDDict(const void *dictBuffer, size_t dictSize, - void *workspace, size_t workspaceSize); +zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size); /** - * ZSTD_decompress_usingDDict() - decompress src into dst using a ZSTD_DDict - * @ctx: The decompression context. - * @dst: The buffer to decompress src into. - * @dstCapacity: The size of the destination buffer. Must be at least as large - * as the decompressed size. If the caller cannot upper bound the - * decompressed size, then it's better to use the streaming API. - * @src: The zstd compressed data to decompress. Multiple concatenated - * frames and skippable frames are allowed. - * @srcSize: The exact size of the data to decompress. - * @ddict: The digested dictionary to use for decompression. The same - * dictionary must've been used to compress the data. + * zstd_decompress_dctx() - decompress zstd compressed src into dst + * @dctx: The decompression context. + * @dst: The buffer to decompress src into. + * @dst_capacity: The size of the destination buffer. Must be at least as large + * as the decompressed size. If the caller cannot upper bound the + * decompressed size, then it's better to use the streaming API. + * @src: The zstd compressed data to decompress. Multiple concatenated + * frames and skippable frames are allowed. + * @src_size: The exact size of the data to decompress. * - * Return: The decompressed size or an error, which can be checked using - * ZSTD_isError(). + * Return: The decompressed size or an error, which can be checked using + * zstd_is_error(). */ -size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, - size_t dstCapacity, const void *src, size_t srcSize, - const ZSTD_DDict *ddict); +size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size); - -/*-************************** - * Streaming - ***************************/ +/* ====== Streaming Buffers ====== */ /** - * struct ZSTD_inBuffer - input buffer for streaming + * struct zstd_in_buffer - input buffer for streaming * @src: Start of the input buffer. * @size: Size of the input buffer. * @pos: Position where reading stopped. Will be updated. * Necessarily 0 <= pos <= size. + * + * See zstd_lib.h. */ -typedef struct ZSTD_inBuffer_s { - const void *src; - size_t size; - size_t pos; -} ZSTD_inBuffer; +typedef ZSTD_inBuffer zstd_in_buffer; /** - * struct ZSTD_outBuffer - output buffer for streaming + * struct zstd_out_buffer - output buffer for streaming * @dst: Start of the output buffer. * @size: Size of the output buffer. * @pos: Position where writing stopped. Will be updated. * Necessarily 0 <= pos <= size. + * + * See zstd_lib.h. */ -typedef struct ZSTD_outBuffer_s { - void *dst; - size_t size; - size_t pos; -} ZSTD_outBuffer; +typedef ZSTD_outBuffer zstd_out_buffer; +/* ====== Streaming Compression ====== */ - -/*-***************************************************************************** - * Streaming compression - HowTo - * - * A ZSTD_CStream object is required to track streaming operation. - * Use ZSTD_initCStream() to initialize a ZSTD_CStream object. - * ZSTD_CStream objects can be reused multiple times on consecutive compression - * operations. It is recommended to re-use ZSTD_CStream in situations where many - * streaming operations will be achieved consecutively. Use one separate - * ZSTD_CStream per thread for parallel execution. - * - * Use ZSTD_compressStream() repetitively to consume input stream. - * The function will automatically update both `pos` fields. - * Note that it may not consume the entire input, in which case `pos < size`, - * and it's up to the caller to present again remaining data. - * It returns a hint for the preferred number of bytes to use as an input for - * the next function call. - * - * At any moment, it's possible to flush whatever data remains within internal - * buffer, using ZSTD_flushStream(). `output->pos` will be updated. There might - * still be some content left within the internal buffer if `output->size` is - * too small. It returns the number of bytes left in the internal buffer and - * must be called until it returns 0. - * - * ZSTD_endStream() instructs to finish a frame. It will perform a flush and - * write frame epilogue. The epilogue is required for decoders to consider a - * frame completed. Similar to ZSTD_flushStream(), it may not be able to flush - * the full content if `output->size` is too small. In which case, call again - * ZSTD_endStream() to complete the flush. It returns the number of bytes left - * in the internal buffer and must be called until it returns 0. - ******************************************************************************/ +typedef ZSTD_CStream zstd_cstream; /** - * ZSTD_CStreamWorkspaceBound() - memory needed to initialize a ZSTD_CStream - * @cParams: The compression parameters to be used for compression. + * zstd_cstream_workspace_bound() - memory needed to initialize a zstd_cstream + * @cparams: The compression parameters to be used for compression. * * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initCStream() and ZSTD_initCStream_usingCDict(). - */ -size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams); - -/** - * struct ZSTD_CStream - the zstd streaming compression context - */ -typedef struct ZSTD_CStream_s ZSTD_CStream; - -/*===== ZSTD_CStream management functions =====*/ -/** - * ZSTD_initCStream() - initialize a zstd streaming compression context - * @params: The zstd compression parameters. - * @pledgedSrcSize: If params.fParams.contentSizeFlag == 1 then the caller must - * pass the source size (zero means empty source). Otherwise, - * the caller may optionally pass the source size, or zero if - * unknown. - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. - * Use ZSTD_CStreamWorkspaceBound(params.cParams) to determine - * how large the workspace must be. - * - * Return: The zstd streaming compression context. + * zstd_init_cstream(). */ -ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params, - unsigned long long pledgedSrcSize, void *workspace, - size_t workspaceSize); +size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams); /** - * ZSTD_initCStream_usingCDict() - initialize a streaming compression context - * @cdict: The digested dictionary to use for compression. - * @pledgedSrcSize: Optionally the source size, or zero if unknown. - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. Call ZSTD_CStreamWorkspaceBound() - * with the cParams used to initialize the cdict to determine - * how large the workspace must be. + * zstd_init_cstream() - initialize a zstd streaming compression context + * @parameters The zstd parameters to use for compression. + * @pledged_src_size: If params.fParams.contentSizeFlag == 1 then the caller + * must pass the source size (zero means empty source). + * Otherwise, the caller may optionally pass the source + * size, or zero if unknown. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspace_size: The size of workspace. + * Use zstd_cstream_workspace_bound(params->cparams) to + * determine how large the workspace must be. * - * Return: The zstd streaming compression context. + * Return: The zstd streaming compression context or NULL on error. */ -ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict, - unsigned long long pledgedSrcSize, void *workspace, - size_t workspaceSize); +zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters, + unsigned long long pledged_src_size, void *workspace, size_t workspace_size); -/*===== Streaming compression functions =====*/ /** - * ZSTD_resetCStream() - reset the context using parameters from creation - * @zcs: The zstd streaming compression context to reset. - * @pledgedSrcSize: Optionally the source size, or zero if unknown. + * zstd_reset_cstream() - reset the context using parameters from creation + * @cstream: The zstd streaming compression context to reset. + * @pledged_src_size: Optionally the source size, or zero if unknown. * * Resets the context using the parameters from creation. Skips dictionary - * loading, since it can be reused. If `pledgedSrcSize` is non-zero the frame + * loading, since it can be reused. If `pledged_src_size` is non-zero the frame * content size is always written into the frame header. * - * Return: Zero or an error, which can be checked using ZSTD_isError(). + * Return: Zero or an error, which can be checked using + * zstd_is_error(). */ -size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize); +size_t zstd_reset_cstream(zstd_cstream *cstream, + unsigned long long pledged_src_size); + /** - * ZSTD_compressStream() - streaming compress some of input into output - * @zcs: The zstd streaming compression context. - * @output: Destination buffer. `output->pos` is updated to indicate how much - * compressed data was written. - * @input: Source buffer. `input->pos` is updated to indicate how much data was - * read. Note that it may not consume the entire input, in which case - * `input->pos < input->size`, and it's up to the caller to present - * remaining data again. + * zstd_compress_stream() - streaming compress some of input into output + * @cstream: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. + * @input: Source buffer. `input->pos` is updated to indicate how much data + * was read. Note that it may not consume the entire input, in which + * case `input->pos < input->size`, and it's up to the caller to + * present remaining data again. * * The `input` and `output` buffers may be any size. Guaranteed to make some * forward progress if `input` and `output` are not empty. * - * Return: A hint for the number of bytes to use as the input for the next - * function call or an error, which can be checked using - * ZSTD_isError(). + * Return: A hint for the number of bytes to use as the input for the next + * function call or an error, which can be checked using + * zstd_is_error(). */ -size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output, - ZSTD_inBuffer *input); +size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output, + zstd_in_buffer *input); + /** - * ZSTD_flushStream() - flush internal buffers into output - * @zcs: The zstd streaming compression context. - * @output: Destination buffer. `output->pos` is updated to indicate how much - * compressed data was written. + * zstd_flush_stream() - flush internal buffers into output + * @cstream: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. * - * ZSTD_flushStream() must be called until it returns 0, meaning all the data - * has been flushed. Since ZSTD_flushStream() causes a block to be ended, + * zstd_flush_stream() must be called until it returns 0, meaning all the data + * has been flushed. Since zstd_flush_stream() causes a block to be ended, * calling it too often will degrade the compression ratio. * - * Return: The number of bytes still present within internal buffers or an - * error, which can be checked using ZSTD_isError(). + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using zstd_is_error(). */ -size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); -/** - * ZSTD_endStream() - flush internal buffers into output and end the frame - * @zcs: The zstd streaming compression context. - * @output: Destination buffer. `output->pos` is updated to indicate how much - * compressed data was written. - * - * ZSTD_endStream() must be called until it returns 0, meaning all the data has - * been flushed and the frame epilogue has been written. - * - * Return: The number of bytes still present within internal buffers or an - * error, which can be checked using ZSTD_isError(). - */ -size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output); +size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output); /** - * ZSTD_CStreamInSize() - recommended size for the input buffer - * - * Return: The recommended size for the input buffer. - */ -size_t ZSTD_CStreamInSize(void); -/** - * ZSTD_CStreamOutSize() - recommended size for the output buffer + * zstd_end_stream() - flush internal buffers into output and end the frame + * @cstream: The zstd streaming compression context. + * @output: Destination buffer. `output->pos` is updated to indicate how much + * compressed data was written. * - * When the output buffer is at least this large, it is guaranteed to be large - * enough to flush at least one complete compressed block. + * zstd_end_stream() must be called until it returns 0, meaning all the data has + * been flushed and the frame epilogue has been written. * - * Return: The recommended size for the output buffer. + * Return: The number of bytes still present within internal buffers or an + * error, which can be checked using zstd_is_error(). */ -size_t ZSTD_CStreamOutSize(void); +size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output); +/* ====== Streaming Decompression ====== */ - -/*-***************************************************************************** - * Streaming decompression - HowTo - * - * A ZSTD_DStream object is required to track streaming operations. - * Use ZSTD_initDStream() to initialize a ZSTD_DStream object. - * ZSTD_DStream objects can be re-used multiple times. - * - * Use ZSTD_decompressStream() repetitively to consume your input. - * The function will update both `pos` fields. - * If `input->pos < input->size`, some input has not been consumed. - * It's up to the caller to present again remaining data. - * If `output->pos < output->size`, decoder has flushed everything it could. - * Returns 0 iff a frame is completely decoded and fully flushed. - * Otherwise it returns a suggested next input size that will never load more - * than the current frame. - ******************************************************************************/ +typedef ZSTD_DStream zstd_dstream; /** - * ZSTD_DStreamWorkspaceBound() - memory needed to initialize a ZSTD_DStream - * @maxWindowSize: The maximum window size allowed for compressed frames. + * zstd_dstream_workspace_bound() - memory needed to initialize a zstd_dstream + * @max_window_size: The maximum window size allowed for compressed frames. * - * Return: A lower bound on the size of the workspace that is passed to - * ZSTD_initDStream() and ZSTD_initDStream_usingDDict(). + * Return: A lower bound on the size of the workspace that is passed + * to zstd_init_dstream(). */ -size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize); +size_t zstd_dstream_workspace_bound(size_t max_window_size); /** - * struct ZSTD_DStream - the zstd streaming decompression context - */ -typedef struct ZSTD_DStream_s ZSTD_DStream; -/*===== ZSTD_DStream management functions =====*/ -/** - * ZSTD_initDStream() - initialize a zstd streaming decompression context - * @maxWindowSize: The maximum window size allowed for compressed frames. - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. - * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine - * how large the workspace must be. - * - * Return: The zstd streaming decompression context. - */ -ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace, - size_t workspaceSize); -/** - * ZSTD_initDStream_usingDDict() - initialize streaming decompression context - * @maxWindowSize: The maximum window size allowed for compressed frames. - * @ddict: The digested dictionary to use for decompression. - * @workspace: The workspace to emplace the context into. It must outlive - * the returned context. - * @workspaceSize: The size of workspace. - * Use ZSTD_DStreamWorkspaceBound(maxWindowSize) to determine - * how large the workspace must be. + * zstd_init_dstream() - initialize a zstd streaming decompression context + * @max_window_size: The maximum window size allowed for compressed frames. + * @workspace: The workspace to emplace the context into. It must outlive + * the returned context. + * @workspaceSize: The size of workspace. + * Use zstd_dstream_workspace_bound(max_window_size) to + * determine how large the workspace must be. * - * Return: The zstd streaming decompression context. + * Return: The zstd streaming decompression context. */ -ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize, - const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize); +zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace, + size_t workspace_size); -/*===== Streaming decompression functions =====*/ /** - * ZSTD_resetDStream() - reset the context using parameters from creation - * @zds: The zstd streaming decompression context to reset. + * zstd_reset_dstream() - reset the context using parameters from creation + * @dstream: The zstd streaming decompression context to reset. * * Resets the context using the parameters from creation. Skips dictionary * loading, since it can be reused. * - * Return: Zero or an error, which can be checked using ZSTD_isError(). + * Return: Zero or an error, which can be checked using zstd_is_error(). */ -size_t ZSTD_resetDStream(ZSTD_DStream *zds); +size_t zstd_reset_dstream(zstd_dstream *dstream); + /** - * ZSTD_decompressStream() - streaming decompress some of input into output - * @zds: The zstd streaming decompression context. - * @output: Destination buffer. `output.pos` is updated to indicate how much - * decompressed data was written. - * @input: Source buffer. `input.pos` is updated to indicate how much data was - * read. Note that it may not consume the entire input, in which case - * `input.pos < input.size`, and it's up to the caller to present - * remaining data again. + * zstd_decompress_stream() - streaming decompress some of input into output + * @dstream: The zstd streaming decompression context. + * @output: Destination buffer. `output.pos` is updated to indicate how much + * decompressed data was written. + * @input: Source buffer. `input.pos` is updated to indicate how much data was + * read. Note that it may not consume the entire input, in which case + * `input.pos < input.size`, and it's up to the caller to present + * remaining data again. * * The `input` and `output` buffers may be any size. Guaranteed to make some * forward progress if `input` and `output` are not empty. - * ZSTD_decompressStream() will not consume the last byte of the frame until + * zstd_decompress_stream() will not consume the last byte of the frame until * the entire frame is flushed. * - * Return: Returns 0 iff a frame is completely decoded and fully flushed. - * Otherwise returns a hint for the number of bytes to use as the input - * for the next function call or an error, which can be checked using - * ZSTD_isError(). The size hint will never load more than the frame. + * Return: Returns 0 iff a frame is completely decoded and fully flushed. + * Otherwise returns a hint for the number of bytes to use as the + * input for the next function call or an error, which can be checked + * using zstd_is_error(). The size hint will never load more than the + * frame. */ -size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, - ZSTD_inBuffer *input); +size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output, + zstd_in_buffer *input); -/** - * ZSTD_DStreamInSize() - recommended size for the input buffer - * - * Return: The recommended size for the input buffer. - */ -size_t ZSTD_DStreamInSize(void); -/** - * ZSTD_DStreamOutSize() - recommended size for the output buffer - * - * When the output buffer is at least this large, it is guaranteed to be large - * enough to flush at least one complete decompressed block. - * - * Return: The recommended size for the output buffer. - */ -size_t ZSTD_DStreamOutSize(void); - - -/* --- Constants ---*/ -#define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */ -#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U - -#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) -#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) - -#define ZSTD_WINDOWLOG_MAX_32 27 -#define ZSTD_WINDOWLOG_MAX_64 27 -#define ZSTD_WINDOWLOG_MAX \ - ((unsigned int)(sizeof(size_t) == 4 \ - ? ZSTD_WINDOWLOG_MAX_32 \ - : ZSTD_WINDOWLOG_MAX_64)) -#define ZSTD_WINDOWLOG_MIN 10 -#define ZSTD_HASHLOG_MAX ZSTD_WINDOWLOG_MAX -#define ZSTD_HASHLOG_MIN 6 -#define ZSTD_CHAINLOG_MAX (ZSTD_WINDOWLOG_MAX+1) -#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN -#define ZSTD_HASHLOG3_MAX 17 -#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) -#define ZSTD_SEARCHLOG_MIN 1 -/* only for ZSTD_fast, other strategies are limited to 6 */ -#define ZSTD_SEARCHLENGTH_MAX 7 -/* only for ZSTD_btopt, other strategies are limited to 4 */ -#define ZSTD_SEARCHLENGTH_MIN 3 -#define ZSTD_TARGETLENGTH_MIN 4 -#define ZSTD_TARGETLENGTH_MAX 999 - -/* for static allocation */ -#define ZSTD_FRAMEHEADERSIZE_MAX 18 -#define ZSTD_FRAMEHEADERSIZE_MIN 6 -static const size_t ZSTD_frameHeaderSize_prefix = 5; -static const size_t ZSTD_frameHeaderSize_min = ZSTD_FRAMEHEADERSIZE_MIN; -static const size_t ZSTD_frameHeaderSize_max = ZSTD_FRAMEHEADERSIZE_MAX; -/* magic number + skippable frame length */ -static const size_t ZSTD_skippableHeaderSize = 8; - - -/*-************************************* - * Compressed size functions - **************************************/ - -/** - * ZSTD_findFrameCompressedSize() - returns the size of a compressed frame - * @src: Source buffer. It should point to the start of a zstd encoded frame - * or a skippable frame. - * @srcSize: The size of the source buffer. It must be at least as large as the - * size of the frame. - * - * Return: The compressed size of the frame pointed to by `src` or an error, - * which can be check with ZSTD_isError(). - * Suitable to pass to ZSTD_decompress() or similar functions. - */ -size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize); - -/*-************************************* - * Decompressed size functions - **************************************/ -/** - * ZSTD_getFrameContentSize() - returns the content size in a zstd frame header - * @src: It should point to the start of a zstd encoded frame. - * @srcSize: The size of the source buffer. It must be at least as large as the - * frame header. `ZSTD_frameHeaderSize_max` is always large enough. - * - * Return: The frame content size stored in the frame header if known. - * `ZSTD_CONTENTSIZE_UNKNOWN` if the content size isn't stored in the - * frame header. `ZSTD_CONTENTSIZE_ERROR` on invalid input. - */ -unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); +/* ====== Frame Inspection Functions ====== */ /** - * ZSTD_findDecompressedSize() - returns decompressed size of a series of frames - * @src: It should point to the start of a series of zstd encoded and/or - * skippable frames. - * @srcSize: The exact size of the series of frames. + * zstd_find_frame_compressed_size() - returns the size of a compressed frame + * @src: Source buffer. It should point to the start of a zstd encoded + * frame or a skippable frame. + * @src_size: The size of the source buffer. It must be at least as large as the + * size of the frame. * - * If any zstd encoded frame in the series doesn't have the frame content size - * set, `ZSTD_CONTENTSIZE_UNKNOWN` is returned. But frame content size is always - * set when using ZSTD_compress(). The decompressed size can be very large. - * If the source is untrusted, the decompressed size could be wrong or - * intentionally modified. Always ensure the result fits within the - * application's authorized limits. ZSTD_findDecompressedSize() handles multiple - * frames, and so it must traverse the input to read each frame header. This is - * efficient as most of the data is skipped, however it does mean that all frame - * data must be present and valid. - * - * Return: Decompressed size of all the data contained in the frames if known. - * `ZSTD_CONTENTSIZE_UNKNOWN` if the decompressed size is unknown. - * `ZSTD_CONTENTSIZE_ERROR` if an error occurred. - */ -unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize); - -/*-************************************* - * Advanced compression functions - **************************************/ -/** - * ZSTD_checkCParams() - ensure parameter values remain within authorized range - * @cParams: The zstd compression parameters. - * - * Return: Zero or an error, which can be checked using ZSTD_isError(). + * Return: The compressed size of the frame pointed to by `src` or an error, + * which can be check with zstd_is_error(). + * Suitable to pass to ZSTD_decompress() or similar functions. */ -size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams); +size_t zstd_find_frame_compressed_size(const void *src, size_t src_size); /** - * ZSTD_adjustCParams() - optimize parameters for a given srcSize and dictSize - * @srcSize: Optionally the estimated source size, or zero if unknown. - * @dictSize: Optionally the estimated dictionary size, or zero if unknown. - * - * Return: The optimized parameters. - */ -ZSTD_compressionParameters ZSTD_adjustCParams( - ZSTD_compressionParameters cParams, unsigned long long srcSize, - size_t dictSize); - -/*--- Advanced decompression functions ---*/ - -/** - * ZSTD_isFrame() - returns true iff the buffer starts with a valid frame - * @buffer: The source buffer to check. - * @size: The size of the source buffer, must be at least 4 bytes. - * - * Return: True iff the buffer starts with a zstd or skippable frame identifier. - */ -unsigned int ZSTD_isFrame(const void *buffer, size_t size); - -/** - * ZSTD_getDictID_fromDict() - returns the dictionary id stored in a dictionary - * @dict: The dictionary buffer. - * @dictSize: The size of the dictionary buffer. - * - * Return: The dictionary id stored within the dictionary or 0 if the - * dictionary is not a zstd dictionary. If it returns 0 the - * dictionary can still be loaded as a content-only dictionary. - */ -unsigned int ZSTD_getDictID_fromDict(const void *dict, size_t dictSize); - -/** - * ZSTD_getDictID_fromDDict() - returns the dictionary id stored in a ZSTD_DDict - * @ddict: The ddict to find the id of. - * - * Return: The dictionary id stored within `ddict` or 0 if the dictionary is not - * a zstd dictionary. If it returns 0 `ddict` will be loaded as a - * content-only dictionary. - */ -unsigned int ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict); - -/** - * ZSTD_getDictID_fromFrame() - returns the dictionary id stored in a zstd frame - * @src: Source buffer. It must be a zstd encoded frame. - * @srcSize: The size of the source buffer. It must be at least as large as the - * frame header. `ZSTD_frameHeaderSize_max` is always large enough. - * - * Return: The dictionary id required to decompress the frame stored within - * `src` or 0 if the dictionary id could not be decoded. It can return - * 0 if the frame does not require a dictionary, the dictionary id - * wasn't stored in the frame, `src` is not a zstd frame, or `srcSize` - * is too small. - */ -unsigned int ZSTD_getDictID_fromFrame(const void *src, size_t srcSize); - -/** - * struct ZSTD_frameParams - zstd frame parameters stored in the frame header - * @frameContentSize: The frame content size, or 0 if not present. + * struct zstd_frame_params - zstd frame parameters stored in the frame header + * @frameContentSize: The frame content size, or ZSTD_CONTENTSIZE_UNKNOWN if not + * present. * @windowSize: The window size, or 0 if the frame is a skippable frame. + * @blockSizeMax: The maximum block size. + * @frameType: The frame type (zstd or skippable) + * @headerSize: The size of the frame header. * @dictID: The dictionary id, or 0 if not present. * @checksumFlag: Whether a checksum was used. + * + * See zstd_lib.h. */ -typedef struct { - unsigned long long frameContentSize; - unsigned int windowSize; - unsigned int dictID; - unsigned int checksumFlag; -} ZSTD_frameParams; +typedef ZSTD_frameHeader zstd_frame_header; /** - * ZSTD_getFrameParams() - extracts parameters from a zstd or skippable frame - * @fparamsPtr: On success the frame parameters are written here. - * @src: The source buffer. It must point to a zstd or skippable frame. - * @srcSize: The size of the source buffer. `ZSTD_frameHeaderSize_max` is - * always large enough to succeed. + * zstd_get_frame_header() - extracts parameters from a zstd or skippable frame + * @params: On success the frame parameters are written here. + * @src: The source buffer. It must point to a zstd or skippable frame. + * @src_size: The size of the source buffer. * - * Return: 0 on success. If more data is required it returns how many bytes - * must be provided to make forward progress. Otherwise it returns - * an error, which can be checked using ZSTD_isError(). + * Return: 0 on success. If more data is required it returns how many bytes + * must be provided to make forward progress. Otherwise it returns + * an error, which can be checked using zstd_is_error(). */ -size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, - size_t srcSize); - -/*-***************************************************************************** - * Buffer-less and synchronous inner streaming functions - * - * This is an advanced API, giving full control over buffer management, for - * users which need direct control over memory. - * But it's also a complex one, with many restrictions (documented below). - * Prefer using normal streaming API for an easier experience - ******************************************************************************/ - -/*-***************************************************************************** - * Buffer-less streaming compression (synchronous mode) - * - * A ZSTD_CCtx object is required to track streaming operations. - * Use ZSTD_initCCtx() to initialize a context. - * ZSTD_CCtx object can be re-used multiple times within successive compression - * operations. - * - * Start by initializing a context. - * Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary - * compression, - * or ZSTD_compressBegin_advanced(), for finer parameter control. - * It's also possible to duplicate a reference context which has already been - * initialized, using ZSTD_copyCCtx() - * - * Then, consume your input using ZSTD_compressContinue(). - * There are some important considerations to keep in mind when using this - * advanced function : - * - ZSTD_compressContinue() has no internal buffer. It uses externally provided - * buffer only. - * - Interface is synchronous : input is consumed entirely and produce 1+ - * (or more) compressed blocks. - * - Caller must ensure there is enough space in `dst` to store compressed data - * under worst case scenario. Worst case evaluation is provided by - * ZSTD_compressBound(). - * ZSTD_compressContinue() doesn't guarantee recover after a failed - * compression. - * - ZSTD_compressContinue() presumes prior input ***is still accessible and - * unmodified*** (up to maximum distance size, see WindowLog). - * It remembers all previous contiguous blocks, plus one separated memory - * segment (which can itself consists of multiple contiguous blocks) - * - ZSTD_compressContinue() detects that prior input has been overwritten when - * `src` buffer overlaps. In which case, it will "discard" the relevant memory - * section from its history. - * - * Finish a frame with ZSTD_compressEnd(), which will write the last block(s) - * and optional checksum. It's possible to use srcSize==0, in which case, it - * will write a final empty block to end the frame. Without last block mark, - * frames will be considered unfinished (corrupted) by decoders. - * - * `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress some new - * frame. - ******************************************************************************/ - -/*===== Buffer-less streaming compression functions =====*/ -size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel); -size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict, - size_t dictSize, int compressionLevel); -size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict, - size_t dictSize, ZSTD_parameters params, - unsigned long long pledgedSrcSize); -size_t ZSTD_copyCCtx(ZSTD_CCtx *cctx, const ZSTD_CCtx *preparedCCtx, - unsigned long long pledgedSrcSize); -size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict, - unsigned long long pledgedSrcSize); -size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); -size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); - - - -/*-***************************************************************************** - * Buffer-less streaming decompression (synchronous mode) - * - * A ZSTD_DCtx object is required to track streaming operations. - * Use ZSTD_initDCtx() to initialize a context. - * A ZSTD_DCtx object can be re-used multiple times. - * - * First typical operation is to retrieve frame parameters, using - * ZSTD_getFrameParams(). It fills a ZSTD_frameParams structure which provide - * important information to correctly decode the frame, such as the minimum - * rolling buffer size to allocate to decompress data (`windowSize`), and the - * dictionary ID used. - * Note: content size is optional, it may not be present. 0 means unknown. - * Note that these values could be wrong, either because of data malformation, - * or because an attacker is spoofing deliberate false information. As a - * consequence, check that values remain within valid application range, - * especially `windowSize`, before allocation. Each application can set its own - * limit, depending on local restrictions. For extended interoperability, it is - * recommended to support at least 8 MB. - * Frame parameters are extracted from the beginning of the compressed frame. - * Data fragment must be large enough to ensure successful decoding, typically - * `ZSTD_frameHeaderSize_max` bytes. - * Result: 0: successful decoding, the `ZSTD_frameParams` structure is filled. - * >0: `srcSize` is too small, provide at least this many bytes. - * errorCode, which can be tested using ZSTD_isError(). - * - * Start decompression, with ZSTD_decompressBegin() or - * ZSTD_decompressBegin_usingDict(). Alternatively, you can copy a prepared - * context, using ZSTD_copyDCtx(). - * - * Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() - * alternatively. - * ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' - * to ZSTD_decompressContinue(). - * ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will - * fail. - * - * The result of ZSTD_decompressContinue() is the number of bytes regenerated - * within 'dst' (necessarily <= dstCapacity). It can be zero, which is not an - * error; it just means ZSTD_decompressContinue() has decoded some metadata - * item. It can also be an error code, which can be tested with ZSTD_isError(). - * - * ZSTD_decompressContinue() needs previous data blocks during decompression, up - * to `windowSize`. They should preferably be located contiguously, prior to - * current block. Alternatively, a round buffer of sufficient size is also - * possible. Sufficient size is determined by frame parameters. - * ZSTD_decompressContinue() is very sensitive to contiguity, if 2 blocks don't - * follow each other, make sure that either the compressor breaks contiguity at - * the same place, or that previous contiguous segment is large enough to - * properly handle maximum back-reference. - * - * A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. - * Context can then be reset to start a new decompression. - * - * Note: it's possible to know if next input to present is a header or a block, - * using ZSTD_nextInputType(). This information is not required to properly - * decode a frame. - * - * == Special case: skippable frames == - * - * Skippable frames allow integration of user-defined data into a flow of - * concatenated frames. Skippable frames will be ignored (skipped) by a - * decompressor. The format of skippable frames is as follows: - * a) Skippable frame ID - 4 Bytes, Little endian format, any value from - * 0x184D2A50 to 0x184D2A5F - * b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits - * c) Frame Content - any content (User Data) of length equal to Frame Size - * For skippable frames ZSTD_decompressContinue() always returns 0. - * For skippable frames ZSTD_getFrameParams() returns fparamsPtr->windowLog==0 - * what means that a frame is skippable. - * Note: If fparamsPtr->frameContentSize==0, it is ambiguous: the frame might - * actually be a zstd encoded frame with no content. For purposes of - * decompression, it is valid in both cases to skip the frame using - * ZSTD_findFrameCompressedSize() to find its size in bytes. - * It also returns frame size as fparamsPtr->frameContentSize. - ******************************************************************************/ - -/*===== Buffer-less streaming decompression functions =====*/ -size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx); -size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, - size_t dictSize); -void ZSTD_copyDCtx(ZSTD_DCtx *dctx, const ZSTD_DCtx *preparedDCtx); -size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx); -size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); -typedef enum { - ZSTDnit_frameHeader, - ZSTDnit_blockHeader, - ZSTDnit_block, - ZSTDnit_lastBlock, - ZSTDnit_checksum, - ZSTDnit_skippableFrame -} ZSTD_nextInputType_e; -ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx); - -/*-***************************************************************************** - * Block functions - * - * Block functions produce and decode raw zstd blocks, without frame metadata. - * Frame metadata cost is typically ~18 bytes, which can be non-negligible for - * very small blocks (< 100 bytes). User will have to take in charge required - * information to regenerate data, such as compressed and content sizes. - * - * A few rules to respect: - * - Compressing and decompressing require a context structure - * + Use ZSTD_initCCtx() and ZSTD_initDCtx() - * - It is necessary to init context before starting - * + compression : ZSTD_compressBegin() - * + decompression : ZSTD_decompressBegin() - * + variants _usingDict() are also allowed - * + copyCCtx() and copyDCtx() work too - * - Block size is limited, it must be <= ZSTD_getBlockSizeMax() - * + If you need to compress more, cut data into multiple blocks - * + Consider using the regular ZSTD_compress() instead, as frame metadata - * costs become negligible when source size is large. - * - When a block is considered not compressible enough, ZSTD_compressBlock() - * result will be zero. In which case, nothing is produced into `dst`. - * + User must test for such outcome and deal directly with uncompressed data - * + ZSTD_decompressBlock() doesn't accept uncompressed data as input!!! - * + In case of multiple successive blocks, decoder must be informed of - * uncompressed block existence to follow proper history. Use - * ZSTD_insertBlock() in such a case. - ******************************************************************************/ - -/* Define for static allocation */ -#define ZSTD_BLOCKSIZE_ABSOLUTEMAX (128 * 1024) -/*===== Raw zstd block functions =====*/ -size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx); -size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); -size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, - const void *src, size_t srcSize); -size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, - size_t blockSize); +size_t zstd_get_frame_header(zstd_frame_header *params, const void *src, + size_t src_size); -#endif /* ZSTD_H */ +#endif /* LINUX_ZSTD_H */ diff --git a/include/linux/zstd_errors.h b/include/linux/zstd_errors.h new file mode 100644 index 000000000000..da9fea781ca0 --- /dev/null +++ b/include/linux/zstd_errors.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_ERRORS_H_398273423 +#define ZSTD_ERRORS_H_398273423 + + +/*===== dependency =====*/ +#include /* size_t */ + + +/* ===== ZSTDERRORLIB_API : control library symbols visibility ===== */ +#define ZSTDERRORLIB_VISIBILITY +#define ZSTDERRORLIB_API ZSTDERRORLIB_VISIBILITY + +/*-********************************************* + * Error codes list + *-********************************************* + * Error codes _values_ are pinned down since v1.3.1 only. + * Therefore, don't rely on values if you may link to any version < v1.3.1. + * + * Only values < 100 are considered stable. + * + * note 1 : this API shall be used with static linking only. + * dynamic linking is not yet officially supported. + * note 2 : Prefer relying on the enum than on its value whenever possible + * This is the only supported way to use the error list < v1.3.1 + * note 3 : ZSTD_isError() is always correct, whatever the library version. + **********************************************/ +typedef enum { + ZSTD_error_no_error = 0, + ZSTD_error_GENERIC = 1, + ZSTD_error_prefix_unknown = 10, + ZSTD_error_version_unsupported = 12, + ZSTD_error_frameParameter_unsupported = 14, + ZSTD_error_frameParameter_windowTooLarge = 16, + ZSTD_error_corruption_detected = 20, + ZSTD_error_checksum_wrong = 22, + ZSTD_error_dictionary_corrupted = 30, + ZSTD_error_dictionary_wrong = 32, + ZSTD_error_dictionaryCreation_failed = 34, + ZSTD_error_parameter_unsupported = 40, + ZSTD_error_parameter_outOfBound = 42, + ZSTD_error_tableLog_tooLarge = 44, + ZSTD_error_maxSymbolValue_tooLarge = 46, + ZSTD_error_maxSymbolValue_tooSmall = 48, + ZSTD_error_stage_wrong = 60, + ZSTD_error_init_missing = 62, + ZSTD_error_memory_allocation = 64, + ZSTD_error_workSpace_tooSmall= 66, + ZSTD_error_dstSize_tooSmall = 70, + ZSTD_error_srcSize_wrong = 72, + ZSTD_error_dstBuffer_null = 74, + /* following error codes are __NOT STABLE__, they can be removed or changed in future versions */ + ZSTD_error_frameIndex_tooLarge = 100, + ZSTD_error_seekableIO = 102, + ZSTD_error_dstBuffer_wrong = 104, + ZSTD_error_srcBuffer_wrong = 105, + ZSTD_error_maxCode = 120 /* never EVER use this value directly, it can change in future versions! Use ZSTD_isError() instead */ +} ZSTD_ErrorCode; + +/*! ZSTD_getErrorCode() : + convert a `size_t` function result into a `ZSTD_ErrorCode` enum type, + which can be used to compare with enum list published above */ +ZSTDERRORLIB_API ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult); +ZSTDERRORLIB_API const char* ZSTD_getErrorString(ZSTD_ErrorCode code); /**< Same as ZSTD_getErrorName, but using a `ZSTD_ErrorCode` enum argument */ + + + +#endif /* ZSTD_ERRORS_H_398273423 */ diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h new file mode 100644 index 000000000000..2e6d0fb5dc27 --- /dev/null +++ b/include/linux/zstd_lib.h @@ -0,0 +1,2432 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_H_235446 +#define ZSTD_H_235446 + +/* ====== Dependency ======*/ +#include /* INT_MAX */ +#include /* size_t */ + + +/* ===== ZSTDLIB_API : control library symbols visibility ===== */ +#define ZSTDLIB_VISIBILITY +#define ZSTDLIB_API ZSTDLIB_VISIBILITY + + +/******************************************************************************* + Introduction + + zstd, short for Zstandard, is a fast lossless compression algorithm, targeting + real-time compression scenarios at zlib-level and better compression ratios. + The zstd compression library provides in-memory compression and decompression + functions. + + The library supports regular compression levels from 1 up to ZSTD_maxCLevel(), + which is currently 22. Levels >= 20, labeled `--ultra`, should be used with + caution, as they require more memory. The library also offers negative + compression levels, which extend the range of speed vs. ratio preferences. + The lower the level, the faster the speed (at the cost of compression). + + Compression can be done in: + - a single step (described as Simple API) + - a single step, reusing a context (described as Explicit context) + - unbounded multiple steps (described as Streaming compression) + + The compression ratio achievable on small data can be highly improved using + a dictionary. Dictionary compression can be performed in: + - a single step (described as Simple dictionary API) + - a single step, reusing a dictionary (described as Bulk-processing + dictionary API) + + Advanced experimental functions can be accessed using + `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h. + + Advanced experimental APIs should never be used with a dynamically-linked + library. They are not "stable"; their definitions or signatures may change in + the future. Only static linking is allowed. +*******************************************************************************/ + +/*------ Version ------*/ +#define ZSTD_VERSION_MAJOR 1 +#define ZSTD_VERSION_MINOR 4 +#define ZSTD_VERSION_RELEASE 10 +#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) + +/*! ZSTD_versionNumber() : + * Return runtime library version, the value is (MAJOR*100*100 + MINOR*100 + RELEASE). */ +ZSTDLIB_API unsigned ZSTD_versionNumber(void); + +#define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE +#define ZSTD_QUOTE(str) #str +#define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str) +#define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) + +/*! ZSTD_versionString() : + * Return runtime library version, like "1.4.5". Requires v1.3.0+. */ +ZSTDLIB_API const char* ZSTD_versionString(void); + +/* ************************************* + * Default constant + ***************************************/ +#ifndef ZSTD_CLEVEL_DEFAULT +# define ZSTD_CLEVEL_DEFAULT 3 +#endif + +/* ************************************* + * Constants + ***************************************/ + +/* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ +#define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ +#define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ +#define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ +#define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 + +#define ZSTD_BLOCKSIZELOG_MAX 17 +#define ZSTD_BLOCKSIZE_MAX (1<= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*! ZSTD_decompress() : + * `compressedSize` : must be the _exact_ size of some number of compressed and/or skippable frames. + * `dstCapacity` is an upper bound of originalSize to regenerate. + * If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. + * @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), + * or an errorCode if it fails (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, + const void* src, size_t compressedSize); + +/*! ZSTD_getFrameContentSize() : requires v1.3.0+ + * `src` should point to the start of a ZSTD encoded frame. + * `srcSize` must be at least as large as the frame header. + * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough. + * @return : - decompressed size of `src` frame content, if known + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) + * note 1 : a 0 return value means the frame is valid but "empty". + * note 2 : decompressed size is an optional field, it may not be present, typically in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * Optionally, application can rely on some implicit limit, + * as ZSTD_decompress() only needs an upper bound of decompressed size. + * (For example, data could be necessarily cut into blocks <= 16 KB). + * note 3 : decompressed size is always present when compression is completed using single-pass functions, + * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict(). + * note 4 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 5 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure return value fits within application's authorized limits. + * Each application can set its own limits. + * note 6 : This function replaces ZSTD_getDecompressedSize() */ +#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1) +#define ZSTD_CONTENTSIZE_ERROR (0ULL - 2) +ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize); + +/*! ZSTD_getDecompressedSize() : + * NOTE: This function is now obsolete, in favor of ZSTD_getFrameContentSize(). + * Both functions work the same way, but ZSTD_getDecompressedSize() blends + * "empty", "unknown" and "error" results to the same return value (0), + * while ZSTD_getFrameContentSize() gives them separate return values. + * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ +ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_findFrameCompressedSize() : + * `src` should point to the start of a ZSTD frame or skippable frame. + * `srcSize` must be >= first frame size + * @return : the compressed size of the first frame starting at `src`, + * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, + * or an error code if input is invalid */ +ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); + + +/*====== Helper functions ======*/ +#define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ +ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ +ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ +ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ +ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ +ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ + + +/*************************************** +* Explicit context +***************************************/ +/*= Compression context + * When compressing many times, + * it is recommended to allocate a context just once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Note : re-using context is just a speed / resource optimization. + * It doesn't change the compression ratio, which remains identical. + * Note 2 : In multi-threaded environments, + * use one different context per thread for parallel execution. + */ +typedef struct ZSTD_CCtx_s ZSTD_CCtx; +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); +ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); /* accept NULL pointer */ + +/*! ZSTD_compressCCtx() : + * Same as ZSTD_compress(), using an explicit ZSTD_CCtx. + * Important : in order to behave similarly to `ZSTD_compress()`, + * this function compresses at requested compression level, + * __ignoring any other parameter__ . + * If any advanced parameter was set using the advanced API, + * they will all be reset. Only `compressionLevel` remains. + */ +ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel); + +/*= Decompression context + * When decompressing many times, + * it is recommended to allocate a context only once, + * and re-use it for each successive compression operation. + * This will make workload friendlier for system's memory. + * Use one context per thread for parallel execution. */ +typedef struct ZSTD_DCtx_s ZSTD_DCtx; +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(void); +ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); /* accept NULL pointer */ + +/*! ZSTD_decompressDCtx() : + * Same as ZSTD_decompress(), + * requires an allocated ZSTD_DCtx. + * Compatible with sticky parameters. + */ +ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/*************************************** +* Advanced compression API +***************************************/ + +/* API design : + * Parameters are pushed one by one into an existing context, + * using ZSTD_CCtx_set*() functions. + * Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame. + * "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` ! + * __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ . + * + * It's possible to reset all parameters to "default" using ZSTD_CCtx_reset(). + * + * This API supercedes all other "advanced" API entry points in the experimental section. + * In the future, we expect to remove from experimental API entry points which are redundant with this API. + */ + + +/* Compression strategies, listed from fastest to strongest */ +typedef enum { ZSTD_fast=1, + ZSTD_dfast=2, + ZSTD_greedy=3, + ZSTD_lazy=4, + ZSTD_lazy2=5, + ZSTD_btlazy2=6, + ZSTD_btopt=7, + ZSTD_btultra=8, + ZSTD_btultra2=9 + /* note : new strategies _might_ be added in the future. + Only the order (from fast to strong) is guaranteed */ +} ZSTD_strategy; + + +typedef enum { + + /* compression parameters + * Note: When compressing with a ZSTD_CDict these parameters are superseded + * by the parameters used to construct the ZSTD_CDict. + * See ZSTD_CCtx_refCDict() for more info (superseded-by-cdict). */ + ZSTD_c_compressionLevel=100, /* Set compression parameters according to pre-defined cLevel table. + * Note that exact compression parameters are dynamically determined, + * depending on both compression level and srcSize (when known). + * Default level is ZSTD_CLEVEL_DEFAULT==3. + * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. + * Note 1 : it's possible to pass a negative compression level. + * Note 2 : setting a level does not automatically set all other compression parameters + * to default. Setting this will however eventually dynamically impact the compression + * parameters which have not been manually set. The manually set + * ones will 'stick'. */ + /* Advanced compression parameters : + * It's possible to pin down compression parameters to some specific values. + * In which case, these values are no longer dynamically selected by the compressor */ + ZSTD_c_windowLog=101, /* Maximum allowed back-reference distance, expressed as power of 2. + * This will set a memory budget for streaming decompression, + * with larger values requiring more memory + * and typically compressing more. + * Must be clamped between ZSTD_WINDOWLOG_MIN and ZSTD_WINDOWLOG_MAX. + * Special: value 0 means "use default windowLog". + * Note: Using a windowLog greater than ZSTD_WINDOWLOG_LIMIT_DEFAULT + * requires explicitly allowing such size at streaming decompression stage. */ + ZSTD_c_hashLog=102, /* Size of the initial probe table, as a power of 2. + * Resulting memory usage is (1 << (hashLog+2)). + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX. + * Larger tables improve compression ratio of strategies <= dFast, + * and improve speed of strategies > dFast. + * Special: value 0 means "use default hashLog". */ + ZSTD_c_chainLog=103, /* Size of the multi-probe search table, as a power of 2. + * Resulting memory usage is (1 << (chainLog+2)). + * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX. + * Larger tables result in better and slower compression. + * This parameter is useless for "fast" strategy. + * It's still useful when using "dfast" strategy, + * in which case it defines a secondary probe table. + * Special: value 0 means "use default chainLog". */ + ZSTD_c_searchLog=104, /* Number of search attempts, as a power of 2. + * More attempts result in better and slower compression. + * This parameter is useless for "fast" and "dFast" strategies. + * Special: value 0 means "use default searchLog". */ + ZSTD_c_minMatch=105, /* Minimum size of searched matches. + * Note that Zstandard can still find matches of smaller size, + * it just tweaks its search algorithm to look for this size and larger. + * Larger values increase compression and decompression speed, but decrease ratio. + * Must be clamped between ZSTD_MINMATCH_MIN and ZSTD_MINMATCH_MAX. + * Note that currently, for all strategies < btopt, effective minimum is 4. + * , for all strategies > fast, effective maximum is 6. + * Special: value 0 means "use default minMatchLength". */ + ZSTD_c_targetLength=106, /* Impact of this field depends on strategy. + * For strategies btopt, btultra & btultra2: + * Length of Match considered "good enough" to stop search. + * Larger values make compression stronger, and slower. + * For strategy fast: + * Distance between match sampling. + * Larger values make compression faster, and weaker. + * Special: value 0 means "use default targetLength". */ + ZSTD_c_strategy=107, /* See ZSTD_strategy enum definition. + * The higher the value of selected strategy, the more complex it is, + * resulting in stronger and slower compression. + * Special: value 0 means "use default strategy". */ + + /* LDM mode parameters */ + ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching. + * This parameter is designed to improve compression ratio + * for large inputs, by finding large matches at long distance. + * It increases memory usage and window size. + * Note: enabling this parameter increases default ZSTD_c_windowLog to 128 MB + * except when expressly set to a different value. + * Note: will be enabled by default if ZSTD_c_windowLog >= 128 MB and + * compression strategy >= ZSTD_btopt (== compression level 16+) */ + ZSTD_c_ldmHashLog=161, /* Size of the table for long distance matching, as a power of 2. + * Larger values increase memory usage and compression ratio, + * but decrease compression speed. + * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX + * default: windowlog - 7. + * Special: value 0 means "automatically determine hashlog". */ + ZSTD_c_ldmMinMatch=162, /* Minimum match size for long distance matcher. + * Larger/too small values usually decrease compression ratio. + * Must be clamped between ZSTD_LDM_MINMATCH_MIN and ZSTD_LDM_MINMATCH_MAX. + * Special: value 0 means "use default value" (default: 64). */ + ZSTD_c_ldmBucketSizeLog=163, /* Log size of each bucket in the LDM hash table for collision resolution. + * Larger values improve collision resolution but decrease compression speed. + * The maximum value is ZSTD_LDM_BUCKETSIZELOG_MAX. + * Special: value 0 means "use default value" (default: 3). */ + ZSTD_c_ldmHashRateLog=164, /* Frequency of inserting/looking up entries into the LDM hash table. + * Must be clamped between 0 and (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN). + * Default is MAX(0, (windowLog - ldmHashLog)), optimizing hash table usage. + * Larger values improve compression speed. + * Deviating far from default value will likely result in a compression ratio decrease. + * Special: value 0 means "automatically determine hashRateLog". */ + + /* frame parameters */ + ZSTD_c_contentSizeFlag=200, /* Content size will be written into frame header _whenever known_ (default:1) + * Content size must be known at the beginning of compression. + * This is automatically the case when using ZSTD_compress2(), + * For streaming scenarios, content size must be provided with ZSTD_CCtx_setPledgedSrcSize() */ + ZSTD_c_checksumFlag=201, /* A 32-bits checksum of content is written at end of frame (default:0) */ + ZSTD_c_dictIDFlag=202, /* When applicable, dictionary's ID is written into frame header (default:1) */ + + /* multi-threading parameters */ + /* These parameters are only active if multi-threading is enabled (compiled with build macro ZSTD_MULTITHREAD). + * Otherwise, trying to set any other value than default (0) will be a no-op and return an error. + * In a situation where it's unknown if the linked library supports multi-threading or not, + * setting ZSTD_c_nbWorkers to any value >= 1 and consulting the return value provides a quick way to check this property. + */ + ZSTD_c_nbWorkers=400, /* Select how many threads will be spawned to compress in parallel. + * When nbWorkers >= 1, triggers asynchronous mode when invoking ZSTD_compressStream*() : + * ZSTD_compressStream*() consumes input and flush output if possible, but immediately gives back control to caller, + * while compression is performed in parallel, within worker thread(s). + * (note : a strong exception to this rule is when first invocation of ZSTD_compressStream2() sets ZSTD_e_end : + * in which case, ZSTD_compressStream2() delegates to ZSTD_compress2(), which is always a blocking call). + * More workers improve speed, but also increase memory usage. + * Default value is `0`, aka "single-threaded mode" : no worker is spawned, + * compression is performed inside Caller's thread, and all invocations are blocking */ + ZSTD_c_jobSize=401, /* Size of a compression job. This value is enforced only when nbWorkers >= 1. + * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads. + * 0 means default, which is dynamically determined based on compression parameters. + * Job size must be a minimum of overlap size, or 1 MB, whichever is largest. + * The minimum size is automatically and transparently enforced. */ + ZSTD_c_overlapLog=402, /* Control the overlap size, as a fraction of window size. + * The overlap size is an amount of data reloaded from previous job at the beginning of a new job. + * It helps preserve compression ratio, while each job is compressed in parallel. + * This value is enforced only when nbWorkers >= 1. + * Larger values increase compression ratio, but decrease speed. + * Possible values range from 0 to 9 : + * - 0 means "default" : value will be determined by the library, depending on strategy + * - 1 means "no overlap" + * - 9 means "full overlap", using a full window size. + * Each intermediate rank increases/decreases load size by a factor 2 : + * 9: full window; 8: w/2; 7: w/4; 6: w/8; 5:w/16; 4: w/32; 3:w/64; 2:w/128; 1:no overlap; 0:default + * default value varies between 6 and 9, depending on strategy */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_c_rsyncable + * ZSTD_c_format + * ZSTD_c_forceMaxWindow + * ZSTD_c_forceAttachDict + * ZSTD_c_literalCompressionMode + * ZSTD_c_targetCBlockSize + * ZSTD_c_srcSizeHint + * ZSTD_c_enableDedicatedDictSearch + * ZSTD_c_stableInBuffer + * ZSTD_c_stableOutBuffer + * ZSTD_c_blockDelimiters + * ZSTD_c_validateSequences + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly; + * also, the enums values themselves are unstable and can still change. + */ + ZSTD_c_experimentalParam1=500, + ZSTD_c_experimentalParam2=10, + ZSTD_c_experimentalParam3=1000, + ZSTD_c_experimentalParam4=1001, + ZSTD_c_experimentalParam5=1002, + ZSTD_c_experimentalParam6=1003, + ZSTD_c_experimentalParam7=1004, + ZSTD_c_experimentalParam8=1005, + ZSTD_c_experimentalParam9=1006, + ZSTD_c_experimentalParam10=1007, + ZSTD_c_experimentalParam11=1008, + ZSTD_c_experimentalParam12=1009 +} ZSTD_cParameter; + +typedef struct { + size_t error; + int lowerBound; + int upperBound; +} ZSTD_bounds; + +/*! ZSTD_cParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - lower and upper bounds, both inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter cParam); + +/*! ZSTD_CCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_cParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_cParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is generally only possible during frame initialization (before starting compression). + * Exception : when using multi-threading mode (nbWorkers >= 1), + * the following parameters can be updated _during_ compression (within same frame): + * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy. + * new parameters will be active for next job only (after a flush()). + * @return : an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtx_setPledgedSrcSize() : + * Total input data size to be compressed as a single frame. + * Value will be written in frame header, unless if explicitly forbidden using ZSTD_c_contentSizeFlag. + * This value will also be controlled at end of frame, and trigger an error if not respected. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : pledgedSrcSize==0 actually means zero, aka an empty frame. + * In order to mean "unknown content size", pass constant ZSTD_CONTENTSIZE_UNKNOWN. + * ZSTD_CONTENTSIZE_UNKNOWN is default value for any new frame. + * Note 2 : pledgedSrcSize is only valid once, for the next frame. + * It's discarded at the end of the frame, and replaced by ZSTD_CONTENTSIZE_UNKNOWN. + * Note 3 : Whenever all input data is provided and consumed in a single round, + * for example with ZSTD_compress2(), + * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), + * this value is automatically overridden by srcSize instead. + */ +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); + +typedef enum { + ZSTD_reset_session_only = 1, + ZSTD_reset_parameters = 2, + ZSTD_reset_session_and_parameters = 3 +} ZSTD_ResetDirective; + +/*! ZSTD_CCtx_reset() : + * There are 2 different things that can be reset, independently or jointly : + * - The session : will stop compressing current frame, and make CCtx ready to start a new one. + * Useful after an error, or to interrupt any ongoing compression. + * Any internal data not yet flushed is cancelled. + * Compression parameters and dictionary remain unchanged. + * They will be used to compress next frame. + * Resetting session never fails. + * - The parameters : changes all parameters back to "default". + * This removes any reference to any dictionary too. + * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) + * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) + * - Both : similar to resetting the session, followed by resetting parameters. + */ +ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); + +/*! ZSTD_compress2() : + * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. + * ZSTD_compress2() always starts a new frame. + * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - The function is always blocking, returns when compression is completed. + * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. + * @return : compressed size written into `dst` (<= `dstCapacity), + * or an error code if it fails (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + + +/*************************************** +* Advanced decompression API +***************************************/ + +/* The advanced API pushes parameters one by one into an existing DCtx context. + * Parameters are sticky, and remain valid for all following frames + * using the same DCtx context. + * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). + * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). + * Therefore, no new decompression function is necessary. + */ + +typedef enum { + + ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which + * the streaming API will refuse to allocate memory buffer + * in order to protect the host from unreasonable memory requirements. + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). + * Special: value 0 means "use default maximum windowLog". */ + + /* note : additional experimental parameters are also available + * within the experimental section of the API. + * At the time of this writing, they include : + * ZSTD_d_format + * ZSTD_d_stableOutBuffer + * ZSTD_d_forceIgnoreChecksum + * ZSTD_d_refMultipleDDicts + * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. + * note : never ever use experimentalParam? names directly + */ + ZSTD_d_experimentalParam1=1000, + ZSTD_d_experimentalParam2=1001, + ZSTD_d_experimentalParam3=1002, + ZSTD_d_experimentalParam4=1003 + +} ZSTD_dParameter; + +/*! ZSTD_dParam_getBounds() : + * All parameters must belong to an interval with lower and upper bounds, + * otherwise they will either trigger an error or be automatically clamped. + * @return : a structure, ZSTD_bounds, which contains + * - an error status field, which must be tested using ZSTD_isError() + * - both lower and upper bounds, inclusive + */ +ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); + +/*! ZSTD_DCtx_setParameter() : + * Set one compression parameter, selected by enum ZSTD_dParameter. + * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). + * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). + * Setting a parameter is only possible during frame initialization (before starting decompression). + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); + +/*! ZSTD_DCtx_reset() : + * Return a DCtx to clean state. + * Session and parameters can be reset jointly or separately. + * Parameters can only be reset when no active frame is being decompressed. + * @return : 0, or an error code, which can be tested with ZSTD_isError() + */ +ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); + + +/**************************** +* Streaming +****************************/ + +typedef struct ZSTD_inBuffer_s { + const void* src; /**< start of input buffer */ + size_t size; /**< size of input buffer */ + size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_inBuffer; + +typedef struct ZSTD_outBuffer_s { + void* dst; /**< start of output buffer */ + size_t size; /**< size of output buffer */ + size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ +} ZSTD_outBuffer; + + + +/*-*********************************************************************** +* Streaming compression - HowTo +* +* A ZSTD_CStream object is required to track streaming operation. +* Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. +* ZSTD_CStream objects can be reused multiple times on consecutive compression operations. +* It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. +* +* For parallel execution, use one separate ZSTD_CStream per thread. +* +* note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. +* +* Parameters are sticky : when starting a new compression on the same context, +* it will re-use the same sticky parameters as previous compression session. +* When in doubt, it's recommended to fully initialize the context before usage. +* Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), +* ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to +* set more specific parameters, the pledged source size, or load a dictionary. +* +* Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to +* consume input stream. The function will automatically update both `pos` +* fields within `input` and `output`. +* Note that the function may not consume the entire input, for example, because +* the output buffer is already full, in which case `input.pos < input.size`. +* The caller must check if input has been entirely consumed. +* If not, the caller must make some room to receive more compressed data, +* and then present again remaining input data. +* note: ZSTD_e_continue is guaranteed to make some forward progress when called, +* but doesn't guarantee maximal forward progress. This is especially relevant +* when compressing with multiple threads. The call won't block if it can +* consume some input, but if it can't it will wait for some, but not all, +* output to be flushed. +* @return : provides a minimum amount of data remaining to be flushed from internal buffers +* or an error code, which can be tested using ZSTD_isError(). +* +* At any moment, it's possible to flush whatever data might remain stuck within internal buffer, +* using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. +* Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). +* In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the +* operation. +* note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if internal buffers are entirely flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. +* It will perform a flush and write frame epilogue. +* The epilogue is required for decoders to consider a frame completed. +* flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. +* You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to +* start a new frame. +* note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will +* block until the flush is complete or the output buffer is full. +* @return : 0 if frame fully completed and fully flushed, +* >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), +* or an error code, which can be tested using ZSTD_isError(). +* +* *******************************************************************/ + +typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ + /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ +/*===== ZSTD_CStream management functions =====*/ +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); +ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); /* accept NULL pointer */ + +/*===== Streaming compression functions =====*/ +typedef enum { + ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ + ZSTD_e_flush=1, /* flush any data provided so far, + * it creates (at least) one new block, that can be decoded immediately on reception; + * frame will continue: any future data can still reference previously compressed data, improving compression. + * note : multithreaded compression will block to flush as much output as possible. */ + ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. + * note that frame is only closed after compressed data is fully flushed (return value == 0). + * After that point, any additional data starts a new frame. + * note : each frame is independent (does not reference any content from previous frame). + : note : multithreaded compression will block to flush as much output as possible. */ +} ZSTD_EndDirective; + +/*! ZSTD_compressStream2() : + * Behaves about the same as ZSTD_compressStream, with additional control on end directive. + * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() + * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) + * - output->pos must be <= dstCapacity, input->pos must be <= srcSize + * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. + * - endOp must be a valid directive + * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. + * - When nbWorkers>=1, function is non-blocking : it copies a portion of input, distributes jobs to internal worker threads, flush to output whatever is available, + * and then immediately returns, just indicating that there is some data remaining to be flushed. + * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. + * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. + * - @return provides a minimum amount of data remaining to be flushed from internal buffers + * or an error code, which can be tested using ZSTD_isError(). + * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. + * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. + * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. + * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), + * only ZSTD_e_end or ZSTD_e_flush operations are allowed. + * Before starting a new compression job, or changing compression parameters, + * it is required to fully flush internal buffers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp); + + +/* These buffer sizes are softly recommended. + * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. + * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), + * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. + * + * However, note that these recommendations are from the perspective of a C caller program. + * If the streaming interface is invoked from some other language, + * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, + * a major performance rule is to reduce crossing such interface to an absolute minimum. + * It's not rare that performance ends being spent more into the interface, rather than compression itself. + * In which cases, prefer using large buffers, as large as practical, + * for both input and output, to reduce the nb of roundtrips. + */ +ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ + + +/* ***************************************************************************** + * This following is a legacy streaming API. + * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). + * It is redundant, but remains fully supported. + * Advanced parameters and dictionary compression can only be used through the + * new API. + ******************************************************************************/ + +/*! + * Equivalent to: + * + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + */ +ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); +/*! + * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). + * NOTE: The return value is different. ZSTD_compressStream() returns a hint for + * the next read size (if non-zero and not an error). ZSTD_compressStream2() + * returns the minimum nb of bytes left to flush (if non-zero and not an error). + */ +ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ +ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); +/*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ +ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); + + +/*-*************************************************************************** +* Streaming decompression - HowTo +* +* A ZSTD_DStream object is required to track streaming operations. +* Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. +* ZSTD_DStream objects can be re-used multiple times. +* +* Use ZSTD_initDStream() to start a new decompression operation. +* @return : recommended first input size +* Alternatively, use advanced API to set specific properties. +* +* Use ZSTD_decompressStream() repetitively to consume your input. +* The function will update both `pos` fields. +* If `input.pos < input.size`, some input has not been consumed. +* It's up to the caller to present again remaining data. +* The function tries to flush all data decoded immediately, respecting output buffer size. +* If `output.pos < output.size`, decoder has flushed everything it could. +* But if `output.pos == output.size`, there might be some data left within internal buffers., +* In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. +* Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. +* @return : 0 when a frame is completely decoded and fully flushed, +* or an error code, which can be tested using ZSTD_isError(), +* or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : +* the return value is a suggested next input size (just a hint for better latency) +* that will never request more than the remaining frame size. +* *******************************************************************************/ + +typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ + /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ +/*===== ZSTD_DStream management functions =====*/ +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); +ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); /* accept NULL pointer */ + +/*===== Streaming decompression functions =====*/ + +/* This function is redundant with the advanced API and equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, NULL); + */ +ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); + +ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); + +ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ +ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ + + +/************************** +* Simple dictionary API +***************************/ +/*! ZSTD_compress_usingDict() : + * Compression at an explicit compression level using a Dictionary. + * A dictionary can be any arbitrary data segment (also called a prefix), + * or a buffer with specified information (see dictBuilder/zdict.h). + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + int compressionLevel); + +/*! ZSTD_decompress_usingDict() : + * Decompression using a known Dictionary. + * Dictionary must be identical to the one used during compression. + * Note : This function loads the dictionary, resulting in significant startup delay. + * It's intended for a dictionary used only once. + * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize); + + +/*********************************** + * Bulk processing dictionary API + **********************************/ +typedef struct ZSTD_CDict_s ZSTD_CDict; + +/*! ZSTD_createCDict() : + * When compressing multiple messages or blocks using the same dictionary, + * it's recommended to digest the dictionary only once, since it's a costly operation. + * ZSTD_createCDict() will create a state from digesting a dictionary. + * The resulting state can be used for future compression operations with very limited startup cost. + * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. + * @dictBuffer can be released after ZSTD_CDict creation, because its content is copied within CDict. + * Note 1 : Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate @dictBuffer content. + * Note 2 : A ZSTD_CDict can be created from an empty @dictBuffer, + * in which case the only thing that it transports is the @compressionLevel. + * This can be useful in a pipeline featuring ZSTD_compress_usingCDict() exclusively, + * expecting a ZSTD_CDict parameter with any data, including those without a known dictionary. */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, + int compressionLevel); + +/*! ZSTD_freeCDict() : + * Function frees memory allocated by ZSTD_createCDict(). + * If a NULL pointer is passed, no operation is performed. */ +ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. + * Note : compression level is _decided at dictionary creation time_, + * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict); + + +typedef struct ZSTD_DDict_s ZSTD_DDict; + +/*! ZSTD_createDDict() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_freeDDict() : + * Function frees memory allocated with ZSTD_createDDict() + * If a NULL pointer is passed, no operation is performed. */ +ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); + +/*! ZSTD_decompress_usingDDict() : + * Decompression using a digested Dictionary. + * Recommended when same dictionary is used multiple times. */ +ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict); + + +/******************************** + * Dictionary helper functions + *******************************/ + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompressed the frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary to be decoded (most common case). + * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); + + +/******************************************************************************* + * Advanced dictionary and prefix API + * + * This API allows dictionaries to be used with ZSTD_compress2(), + * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and + * only reset with the context is reset with ZSTD_reset_parameters or + * ZSTD_reset_session_and_parameters. Prefixes are single-use. + ******************************************************************************/ + + +/*! ZSTD_CCtx_loadDictionary() : + * Create an internal CDict from `dict` buffer. + * Decompression will have to use same dictionary. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Loading a NULL (or 0-size) dictionary invalidates previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Dictionary is sticky, it will be used for all future compressed frames. + * To return to "no-dictionary" situation, load a NULL dictionary (or reset parameters). + * Note 2 : Loading a dictionary involves building tables. + * It's also a CPU consuming operation, with non-negligible impact on latency. + * Tables are dependent on compression parameters, and for this reason, + * compression parameters can no longer be changed after loading a dictionary. + * Note 3 :`dict` content will be copied internally. + * Use experimental ZSTD_CCtx_loadDictionary_byReference() to reference content instead. + * In such a case, dictionary buffer must outlive its users. + * Note 4 : Use ZSTD_CCtx_loadDictionary_advanced() + * to precisely select how dictionary content must be interpreted. */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_refCDict() : + * Reference a prepared dictionary, to be used for all next compressed frames. + * Note that compression parameters are enforced from within CDict, + * and supersede any compression parameter previously set within CCtx. + * The parameters ignored are labelled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. + * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. + * The dictionary will remain valid for future compressed frames using same CCtx. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Referencing a NULL CDict means "return to no-dictionary mode". + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */ +ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); + +/*! ZSTD_CCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) for next compressed frame. + * A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end). + * Decompression will need same prefix to properly regenerate data. + * Compressing with a prefix is similar in outcome as performing a diff and compressing it, + * but performs much faster, especially during decompression (compression speed is tunable with compression level). + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary + * Note 1 : Prefix buffer is referenced. It **must** outlive compression. + * Its content must remain unmodified during compression. + * Note 2 : If the intention is to diff some large src data blob with some prior version of itself, + * ensure that the window size is large enough to contain the entire source. + * See ZSTD_c_windowLog. + * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters. + * It's a CPU consuming operation, with non-negligible impact on latency. + * If there is a need to use the same prefix multiple times, consider loadDictionary instead. + * Note 4 : By default, the prefix is interpreted as raw content (ZSTD_dct_rawContent). + * Use experimental ZSTD_CCtx_refPrefix_advanced() to alter dictionary interpretation. */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, + const void* prefix, size_t prefixSize); + +/*! ZSTD_DCtx_loadDictionary() : + * Create an internal DDict from dict buffer, + * to be used to decompress next frames. + * The dictionary remains valid for all future frames, until explicitly invalidated. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Special : Adding a NULL (or 0-size) dictionary invalidates any previous dictionary, + * meaning "return to no-dictionary mode". + * Note 1 : Loading a dictionary involves building tables, + * which has a non-negligible impact on CPU usage and latency. + * It's recommended to "load once, use many times", to amortize the cost + * Note 2 :`dict` content will be copied internally, so `dict` can be released after loading. + * Use ZSTD_DCtx_loadDictionary_byReference() to reference dictionary content instead. + * Note 3 : Use ZSTD_DCtx_loadDictionary_advanced() to take control of + * how dictionary content is loaded and interpreted. + */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_refDDict() : + * Reference a prepared dictionary, to be used to decompress next frames. + * The dictionary remains active for decompression of future frames using same DCtx. + * + * If called with ZSTD_d_refMultipleDDicts enabled, repeated calls of this function + * will store the DDict references in a table, and the DDict used for decompression + * will be determined at decompression time, as per the dict ID in the frame. + * The memory for the table is allocated on the first call to refDDict, and can be + * freed with ZSTD_freeDCtx(). + * + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Currently, only one dictionary can be managed. + * Referencing a new dictionary effectively "discards" any previous one. + * Special: referencing a NULL DDict means "return to no-dictionary mode". + * Note 2 : DDict is just referenced, its lifetime must outlive its usage from DCtx. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +/*! ZSTD_DCtx_refPrefix() : + * Reference a prefix (single-usage dictionary) to decompress next frame. + * This is the reverse operation of ZSTD_CCtx_refPrefix(), + * and must use the same prefix as the one used during compression. + * Prefix is **only used once**. Reference is discarded at end of frame. + * End of frame is reached when ZSTD_decompressStream() returns 0. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary + * Note 2 : Prefix buffer is referenced. It **must** outlive decompression. + * Prefix buffer must remain unmodified up to the end of frame, + * reached when ZSTD_decompressStream() returns 0. + * Note 3 : By default, the prefix is treated as raw content (ZSTD_dct_rawContent). + * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode (Experimental section) + * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost. + * A full dictionary is more costly, as it requires building tables. + */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, + const void* prefix, size_t prefixSize); + +/* === Memory management === */ + +/*! ZSTD_sizeof_*() : + * These functions give the _current_ memory usage of selected object. + * Note that object memory usage can evolve (increase or decrease) over time. */ +ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); +ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); +ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); +ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); + +#endif /* ZSTD_H_235446 */ + + +/* ************************************************************************************** + * ADVANCED AND EXPERIMENTAL FUNCTIONS + **************************************************************************************** + * The definitions in the following section are considered experimental. + * They are provided for advanced scenarios. + * They should never be used with a dynamic library, as prototypes may change in the future. + * Use them only in association with static linking. + * ***************************************************************************************/ + +#if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) +#define ZSTD_H_ZSTD_STATIC_LINKING_ONLY + +/**************************************************************************************** + * experimental API (static linking only) + **************************************************************************************** + * The following symbols and constants + * are not planned to join "stable API" status in the near future. + * They can still change in future versions. + * Some of them are planned to remain in the static_only section indefinitely. + * Some of them might be removed in the future (especially when redundant with existing stable functions) + * ***************************************************************************************/ + +#define ZSTD_FRAMEHEADERSIZE_PREFIX(format) ((format) == ZSTD_f_zstd1 ? 5 : 1) /* minimum input size required to query frame header size */ +#define ZSTD_FRAMEHEADERSIZE_MIN(format) ((format) == ZSTD_f_zstd1 ? 6 : 2) +#define ZSTD_FRAMEHEADERSIZE_MAX 18 /* can be useful for static allocation */ +#define ZSTD_SKIPPABLEHEADERSIZE 8 + +/* compression parameter bounds */ +#define ZSTD_WINDOWLOG_MAX_32 30 +#define ZSTD_WINDOWLOG_MAX_64 31 +#define ZSTD_WINDOWLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_WINDOWLOG_MAX_32 : ZSTD_WINDOWLOG_MAX_64)) +#define ZSTD_WINDOWLOG_MIN 10 +#define ZSTD_HASHLOG_MAX ((ZSTD_WINDOWLOG_MAX < 30) ? ZSTD_WINDOWLOG_MAX : 30) +#define ZSTD_HASHLOG_MIN 6 +#define ZSTD_CHAINLOG_MAX_32 29 +#define ZSTD_CHAINLOG_MAX_64 30 +#define ZSTD_CHAINLOG_MAX ((int)(sizeof(size_t) == 4 ? ZSTD_CHAINLOG_MAX_32 : ZSTD_CHAINLOG_MAX_64)) +#define ZSTD_CHAINLOG_MIN ZSTD_HASHLOG_MIN +#define ZSTD_SEARCHLOG_MAX (ZSTD_WINDOWLOG_MAX-1) +#define ZSTD_SEARCHLOG_MIN 1 +#define ZSTD_MINMATCH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */ +#define ZSTD_MINMATCH_MIN 3 /* only for ZSTD_btopt+, faster strategies are limited to 4 */ +#define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX +#define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */ +#define ZSTD_STRATEGY_MIN ZSTD_fast +#define ZSTD_STRATEGY_MAX ZSTD_btultra2 + + +#define ZSTD_OVERLAPLOG_MIN 0 +#define ZSTD_OVERLAPLOG_MAX 9 + +#define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame + * requiring larger than (1< 0: + * If litLength != 0: + * rep == 1 --> offset == repeat_offset_1 + * rep == 2 --> offset == repeat_offset_2 + * rep == 3 --> offset == repeat_offset_3 + * If litLength == 0: + * rep == 1 --> offset == repeat_offset_2 + * rep == 2 --> offset == repeat_offset_3 + * rep == 3 --> offset == repeat_offset_1 - 1 + * + * Note: This field is optional. ZSTD_generateSequences() will calculate the value of + * 'rep', but repeat offsets do not necessarily need to be calculated from an external + * sequence provider's perspective. For example, ZSTD_compressSequences() does not + * use this 'rep' field at all (as of now). + */ +} ZSTD_Sequence; + +typedef struct { + unsigned windowLog; /**< largest match distance : larger == more compression, more memory needed during decompression */ + unsigned chainLog; /**< fully searched segment : larger == more compression, slower, more memory (useless for fast) */ + unsigned hashLog; /**< dispatch table : larger == faster, more memory */ + unsigned searchLog; /**< nb of searches : larger == more compression, slower */ + unsigned minMatch; /**< match length searched : larger == faster decompression, sometimes less compression */ + unsigned targetLength; /**< acceptable match size for optimal parser (only) : larger == more compression, slower */ + ZSTD_strategy strategy; /**< see ZSTD_strategy definition above */ +} ZSTD_compressionParameters; + +typedef struct { + int contentSizeFlag; /**< 1: content size will be in frame header (when known) */ + int checksumFlag; /**< 1: generate a 32-bits checksum using XXH64 algorithm at end of frame, for error detection */ + int noDictIDFlag; /**< 1: no dictID will be saved into frame header (dictID is only useful for dictionary compression) */ +} ZSTD_frameParameters; + +typedef struct { + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; +} ZSTD_parameters; + +typedef enum { + ZSTD_dct_auto = 0, /* dictionary is "full" when starting with ZSTD_MAGIC_DICTIONARY, otherwise it is "rawContent" */ + ZSTD_dct_rawContent = 1, /* ensures dictionary is always loaded as rawContent, even if it starts with ZSTD_MAGIC_DICTIONARY */ + ZSTD_dct_fullDict = 2 /* refuses to load a dictionary if it does not respect Zstandard's specification, starting with ZSTD_MAGIC_DICTIONARY */ +} ZSTD_dictContentType_e; + +typedef enum { + ZSTD_dlm_byCopy = 0, /**< Copy dictionary content internally */ + ZSTD_dlm_byRef = 1 /**< Reference dictionary content -- the dictionary buffer must outlive its users. */ +} ZSTD_dictLoadMethod_e; + +typedef enum { + ZSTD_f_zstd1 = 0, /* zstd frame format, specified in zstd_compression_format.md (default) */ + ZSTD_f_zstd1_magicless = 1 /* Variant of zstd frame format, without initial 4-bytes magic number. + * Useful to save 4 bytes per generated frame. + * Decoder cannot recognise automatically this format, requiring this instruction. */ +} ZSTD_format_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_forceIgnoreChecksum */ + ZSTD_d_validateChecksum = 0, + ZSTD_d_ignoreChecksum = 1 +} ZSTD_forceIgnoreChecksum_e; + +typedef enum { + /* Note: this enum controls ZSTD_d_refMultipleDDicts */ + ZSTD_rmd_refSingleDDict = 0, + ZSTD_rmd_refMultipleDDicts = 1 +} ZSTD_refMultipleDDicts_e; + +typedef enum { + /* Note: this enum and the behavior it controls are effectively internal + * implementation details of the compressor. They are expected to continue + * to evolve and should be considered only in the context of extremely + * advanced performance tuning. + * + * Zstd currently supports the use of a CDict in three ways: + * + * - The contents of the CDict can be copied into the working context. This + * means that the compression can search both the dictionary and input + * while operating on a single set of internal tables. This makes + * the compression faster per-byte of input. However, the initial copy of + * the CDict's tables incurs a fixed cost at the beginning of the + * compression. For small compressions (< 8 KB), that copy can dominate + * the cost of the compression. + * + * - The CDict's tables can be used in-place. In this model, compression is + * slower per input byte, because the compressor has to search two sets of + * tables. However, this model incurs no start-up cost (as long as the + * working context's tables can be reused). For small inputs, this can be + * faster than copying the CDict's tables. + * + * - The CDict's tables are not used at all, and instead we use the working + * context alone to reload the dictionary and use params based on the source + * size. See ZSTD_compress_insertDictionary() and ZSTD_compress_usingDict(). + * This method is effective when the dictionary sizes are very small relative + * to the input size, and the input size is fairly large to begin with. + * + * Zstd has a simple internal heuristic that selects which strategy to use + * at the beginning of a compression. However, if experimentation shows that + * Zstd is making poor choices, it is possible to override that choice with + * this enum. + */ + ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */ + ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */ + ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ + ZSTD_dictForceLoad = 3 /* Always reload the dictionary */ +} ZSTD_dictAttachPref_e; + +typedef enum { + ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. + * Negative compression levels will be uncompressed, and positive compression + * levels will be compressed. */ + ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be + * emitted if Huffman compression is not profitable. */ + ZSTD_lcm_uncompressed = 2 /**< Always emit uncompressed literals. */ +} ZSTD_literalCompressionMode_e; + + +/*************************************** +* Frame size functions +***************************************/ + +/*! ZSTD_findDecompressedSize() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - decompressed size of all data in all successive frames + * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : decompressed size is an optional field, that may not be present, especially in streaming mode. + * When `return==ZSTD_CONTENTSIZE_UNKNOWN`, data to decompress could be any size. + * In which case, it's necessary to use streaming mode to decompress data. + * note 2 : decompressed size is always present when compression is done with ZSTD_compress() + * note 3 : decompressed size can be very large (64-bits value), + * potentially larger than what local system can handle as a single memory segment. + * In which case, it's necessary to use streaming mode to decompress data. + * note 4 : If source is untrusted, decompressed size could be wrong or intentionally modified. + * Always ensure result fits within application's authorized limits. + * Each application can set its own limits. + * note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to + * read each contained frame header. This is fast as most of the data is skipped, + * however it does mean that all frame data must be present and valid. */ +ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); + +/*! ZSTD_decompressBound() : + * `src` should point to the start of a series of ZSTD encoded and/or skippable frames + * `srcSize` must be the _exact_ size of this series + * (i.e. there should be a frame boundary at `src + srcSize`) + * @return : - upper-bound for the decompressed size of all data in all successive frames + * - if an error occurred: ZSTD_CONTENTSIZE_ERROR + * + * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. + * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. + * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. + * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: + * upper-bound = # blocks * min(128 KB, Window_Size) + */ +ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); + +/*! ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize); + +typedef enum { + ZSTD_sf_noBlockDelimiters = 0, /* Representation of ZSTD_Sequence has no block delimiters, sequences only */ + ZSTD_sf_explicitBlockDelimiters = 1 /* Representation of ZSTD_Sequence contains explicit block delimiters */ +} ZSTD_sequenceFormat_e; + +/*! ZSTD_generateSequences() : + * Generate sequences using ZSTD_compress2, given a source buffer. + * + * Each block will end with a dummy sequence + * with offset == 0, matchLength == 0, and litLength == length of last literals. + * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0) + * simply acts as a block delimiter. + * + * zc can be used to insert custom compression params. + * This function invokes ZSTD_compress2 + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters + * @return : number of sequences generated + */ + +ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize); + +/*! ZSTD_mergeBlockDelimiters() : + * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals + * by merging them into into the literals of the next sequence. + * + * As such, the final generated result has no explicit representation of block boundaries, + * and the final last literals segment is not represented in the sequences. + * + * The output of this function can be fed into ZSTD_compressSequences() with CCtx + * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters + * @return : number of sequences left after merging + */ +ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize); + +/*! ZSTD_compressSequences() : + * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst. + * If a dictionary is included, then the cctx should reference the dict. (see: ZSTD_CCtx_refCDict(), ZSTD_CCtx_loadDictionary(), etc.) + * The entire source is compressed into a single frame. + * + * The compression behavior changes based on cctx params. In particular: + * If ZSTD_c_blockDelimiters == ZSTD_sf_noBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * no block delimiters (defined in ZSTD_Sequence). Block boundaries are roughly determined based on + * the block size derived from the cctx, and sequences may be split. This is the default setting. + * + * If ZSTD_c_blockDelimiters == ZSTD_sf_explicitBlockDelimiters, the array of ZSTD_Sequence is expected to contain + * block delimiters (defined in ZSTD_Sequence). Behavior is undefined if no block delimiters are provided. + * + * If ZSTD_c_validateSequences == 0, this function will blindly accept the sequences provided. Invalid sequences cause undefined + * behavior. If ZSTD_c_validateSequences == 1, then if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and return an error. + * + * In addition to the two adjustable experimental params, there are other important cctx params. + * - ZSTD_c_minMatch MUST be set as less than or equal to the smallest match generated by the match finder. It has a minimum value of ZSTD_MINMATCH_MIN. + * - ZSTD_c_compressionLevel accordingly adjusts the strength of the entropy coder, as it would in typical compression. + * - ZSTD_c_windowLog affects offset validation: this function will return an error at higher debug levels if a provided offset + * is larger than what the spec allows for a given window log and dictionary (if present). See: doc/zstd_compression_format.md + * + * Note: Repcodes are, as of now, always re-calculated within this function, so ZSTD_Sequence::rep is unused. + * Note 2: Once we integrate ability to ingest repcodes, the explicit block delims mode must respect those repcodes exactly, + * and cannot emit an RLE block that disagrees with the repcode history + * @return : final compressed size or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize); + + +/*! ZSTD_writeSkippableFrame() : + * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer. + * + * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number, + * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15. + * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so + * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant. + * + * Returns an error if destination buffer is not large enough, if the source size is not representable + * with a 4-byte unsigned int, or if the parameter magicVariant is greater than 15 (and therefore invalid). + * + * @return : number of bytes written or a ZSTD error. + */ +ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant); + + +/*************************************** +* Memory management +***************************************/ + +/*! ZSTD_estimate*() : + * These functions make it possible to estimate memory usage + * of a future {D,C}Ctx, before its creation. + * + * ZSTD_estimateCCtxSize() will provide a memory budget large enough + * for any compression level up to selected one. + * Note : Unlike ZSTD_estimateCStreamSize*(), this estimate + * does not include space for a window buffer. + * Therefore, the estimation is only guaranteed for single-shot compressions, not streaming. + * The estimate will assume the input may be arbitrarily large, + * which is the worst case. + * + * When srcSize can be bound by a known and rather "small" value, + * this fact can be used to provide a tighter estimation + * because the CCtx compression context will need less memory. + * This tighter estimation can be provided by more advanced functions + * ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(), + * and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter(). + * Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits. + * + * Note 2 : only single-threaded compression is supported. + * ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1. + */ +ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); + +/*! ZSTD_estimateCStreamSize() : + * ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one. + * It will also consider src size to be arbitrarily "large", which is worst case. + * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. + * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. + * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. + * Note : CStream size estimation is only correct for single-threaded compression. + * ZSTD_DStream memory budget depends on window Size. + * This information can be passed manually, using ZSTD_estimateDStreamSize, + * or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame(); + * Note : if streaming is init with function ZSTD_init?Stream_usingDict(), + * an internal ?Dict will be created, which additional size is not estimated here. + * In this case, get total size by adding ZSTD_estimate?DictSize */ +ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams); +ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize); +ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize); + +/*! ZSTD_estimate?DictSize() : + * ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict(). + * ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced(). + * Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller. + */ +ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod); +ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod); + +/*! ZSTD_initStatic*() : + * Initialize an object using a pre-allocated fixed-size buffer. + * workspace: The memory area to emplace the object into. + * Provided pointer *must be 8-bytes aligned*. + * Buffer must outlive object. + * workspaceSize: Use ZSTD_estimate*Size() to determine + * how large workspace must be to support target scenario. + * @return : pointer to object (same address as workspace, just different type), + * or NULL if error (size too small, incorrect alignment, etc.) + * Note : zstd will never resize nor malloc() when using a static buffer. + * If the object requires more memory than available, + * zstd will just error out (typically ZSTD_error_memory_allocation). + * Note 2 : there is no corresponding "free" function. + * Since workspace is allocated externally, it must be freed externally too. + * Note 3 : cParams : use ZSTD_getCParams() to convert a compression level + * into its associated cParams. + * Limitation 1 : currently not compatible with internal dictionary creation, triggered by + * ZSTD_CCtx_loadDictionary(), ZSTD_initCStream_usingDict() or ZSTD_initDStream_usingDict(). + * Limitation 2 : static cctx currently not compatible with multi-threading. + * Limitation 3 : static dctx is incompatible with legacy support. + */ +ZSTDLIB_API ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticCCtx() */ + +ZSTDLIB_API ZSTD_DCtx* ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize); +ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize); /**< same as ZSTD_initStaticDCtx() */ + +ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams); + +ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType); + + +/*! Custom memory allocation : + * These prototypes make it possible to pass your own allocation/free functions. + * ZSTD_customMem is provided at creation time, using ZSTD_create*_advanced() variants listed below. + * All allocation/free operations will be completed using these custom variants instead of regular ones. + */ +typedef void* (*ZSTD_allocFunction) (void* opaque, size_t size); +typedef void (*ZSTD_freeFunction) (void* opaque, void* address); +typedef struct { ZSTD_allocFunction customAlloc; ZSTD_freeFunction customFree; void* opaque; } ZSTD_customMem; +static +__attribute__((__unused__)) +ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL }; /**< this constant defers to stdlib's functions */ + +ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem); +ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem); + +/* ! Thread pool : + * These prototypes make it possible to share a thread pool among multiple compression contexts. + * This can limit resources for applications with multiple threads where each one uses + * a threaded compression mode (via ZSTD_c_nbWorkers parameter). + * ZSTD_createThreadPool creates a new thread pool with a given number of threads. + * Note that the lifetime of such pool must exist while being used. + * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value + * to use an internal thread pool). + * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer. + */ +typedef struct POOL_ctx_s ZSTD_threadPool; +ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads); +ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool); /* accept NULL pointer */ +ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool); + + +/* + * This API is temporary and is expected to change or disappear in the future! + */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* cctxParams, + ZSTD_customMem customMem); + +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem); + + +/*************************************** +* Advanced compression functions +***************************************/ + +/*! ZSTD_createCDict_byReference() : + * Create a digested dictionary for compression + * Dictionary content is just referenced, not duplicated. + * As a consequence, `dictBuffer` **must** outlive CDict, + * and its content must remain unmodified throughout the lifetime of CDict. + * note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */ +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel); + +/*! ZSTD_getDictID_fromCDict() : + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict); + +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. + * `estimatedSrcSize` value is optional, select 0 if not known */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_getParams() : + * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. + * All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */ +ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); + +/*! ZSTD_checkCParams() : + * Ensure param values remain within authorized range. + * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ +ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); + +/*! ZSTD_adjustCParams() : + * optimize params for a given `srcSize` and `dictSize`. + * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. + * `dictSize` must be `0` when there is no dictionary. + * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. + * This function never fails (wide contract) */ +ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); + +/*! ZSTD_compress_advanced() : + * Note : this function is now DEPRECATED. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */ +ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params); + +/*! ZSTD_compress_usingCDict_advanced() : + * Note : this function is now REDUNDANT. + * It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters. + * This prototype will be marked as deprecated and generate compilation warning in some future version */ +ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams); + + +/*! ZSTD_CCtx_loadDictionary_byReference() : + * Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx. + * It saves some memory, but also requires that `dict` outlives its usage within `cctx` */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize); + +/*! ZSTD_CCtx_loadDictionary_advanced() : + * Same as ZSTD_CCtx_loadDictionary(), but gives finer control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_CCtx_refPrefix_advanced() : + * Same as ZSTD_CCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/* === experimental parameters === */ +/* these parameters can be used with ZSTD_setParameter() + * they are not guaranteed to remain supported in the future */ + + /* Enables rsyncable mode, + * which makes compressed files more rsync friendly + * by adding periodic synchronization points to the compressed data. + * The target average block size is ZSTD_c_jobSize / 2. + * It's possible to modify the job size to increase or decrease + * the granularity of the synchronization point. + * Once the jobSize is smaller than the window size, + * it will result in compression ratio degradation. + * NOTE 1: rsyncable mode only works when multithreading is enabled. + * NOTE 2: rsyncable performs poorly in combination with long range mode, + * since it will decrease the effectiveness of synchronization points, + * though mileage may vary. + * NOTE 3: Rsyncable mode limits maximum compression speed to ~400 MB/s. + * If the selected compression level is already running significantly slower, + * the overall speed won't be significantly impacted. + */ + #define ZSTD_c_rsyncable ZSTD_c_experimentalParam1 + +/* Select a compression format. + * The value must be of type ZSTD_format_e. + * See ZSTD_format_e enum definition for details */ +#define ZSTD_c_format ZSTD_c_experimentalParam2 + +/* Force back-reference distances to remain < windowSize, + * even when referencing into Dictionary content (default:0) */ +#define ZSTD_c_forceMaxWindow ZSTD_c_experimentalParam3 + +/* Controls whether the contents of a CDict + * are used in place, or copied into the working context. + * Accepts values from the ZSTD_dictAttachPref_e enum. + * See the comments on that enum for an explanation of the feature. */ +#define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 + +/* Controls how the literals are compressed (default is auto). + * The value must be of type ZSTD_literalCompressionMode_e. + * See ZSTD_literalCompressionMode_t enum definition for details. + */ +#define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 + +/* Tries to fit compressed block size to be around targetCBlockSize. + * No target when targetCBlockSize == 0. + * There is no guarantee on compressed block size (default:0) */ +#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 + +/* User's best guess of source size. + * Hint is not valid when srcSizeHint == 0. + * There is no guarantee that hint is close to actual source size, + * but compression ratio may regress significantly if guess considerably underestimates */ +#define ZSTD_c_srcSizeHint ZSTD_c_experimentalParam7 + +/* Controls whether the new and experimental "dedicated dictionary search + * structure" can be used. This feature is still rough around the edges, be + * prepared for surprising behavior! + * + * How to use it: + * + * When using a CDict, whether to use this feature or not is controlled at + * CDict creation, and it must be set in a CCtxParams set passed into that + * construction (via ZSTD_createCDict_advanced2()). A compression will then + * use the feature or not based on how the CDict was constructed; the value of + * this param, set in the CCtx, will have no effect. + * + * However, when a dictionary buffer is passed into a CCtx, such as via + * ZSTD_CCtx_loadDictionary(), this param can be set on the CCtx to control + * whether the CDict that is created internally can use the feature or not. + * + * What it does: + * + * Normally, the internal data structures of the CDict are analogous to what + * would be stored in a CCtx after compressing the contents of a dictionary. + * To an approximation, a compression using a dictionary can then use those + * data structures to simply continue what is effectively a streaming + * compression where the simulated compression of the dictionary left off. + * Which is to say, the search structures in the CDict are normally the same + * format as in the CCtx. + * + * It is possible to do better, since the CDict is not like a CCtx: the search + * structures are written once during CDict creation, and then are only read + * after that, while the search structures in the CCtx are both read and + * written as the compression goes along. This means we can choose a search + * structure for the dictionary that is read-optimized. + * + * This feature enables the use of that different structure. + * + * Note that some of the members of the ZSTD_compressionParameters struct have + * different semantics and constraints in the dedicated search structure. It is + * highly recommended that you simply set a compression level in the CCtxParams + * you pass into the CDict creation call, and avoid messing with the cParams + * directly. + * + * Effects: + * + * This will only have any effect when the selected ZSTD_strategy + * implementation supports this feature. Currently, that's limited to + * ZSTD_greedy, ZSTD_lazy, and ZSTD_lazy2. + * + * Note that this means that the CDict tables can no longer be copied into the + * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be + * useable. The dictionary can only be attached or reloaded. + * + * In general, you should expect compression to be faster--sometimes very much + * so--and CDict creation to be slightly slower. Eventually, we will probably + * make this mode the default. + */ +#define ZSTD_c_enableDedicatedDictSearch ZSTD_c_experimentalParam8 + +/* ZSTD_c_stableInBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the compressor that the ZSTD_inBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the compressor, and + * compression will fail if it ever changes. This means the only flush + * mode that makes sense is ZSTD_e_end, so zstd will error if ZSTD_e_end + * is not used. The data in the ZSTD_inBuffer in the range [src, src + pos) + * MUST not be modified during compression or you will get data corruption. + * + * When this flag is enabled zstd won't allocate an input window buffer, + * because the user guarantees it can reference the ZSTD_inBuffer until + * the frame is complete. But, it will still allocate an output buffer + * large enough to fit a block (see ZSTD_c_stableOutBuffer). This will also + * avoid the memcpy() from the input buffer to the input window buffer. + * + * NOTE: ZSTD_compressStream2() will error if ZSTD_e_end is not used. + * That means this flag cannot be used with ZSTD_compressStream(). + * + * NOTE: So long as the ZSTD_inBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, compression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_inBuffer in the range [dst, dst + pos) MUST + * not be modified during compression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_inBuffer to find + * matches. Normally zstd maintains its own window buffer for this purpose, + * but passing this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_c_stableInBuffer ZSTD_c_experimentalParam9 + +/* ZSTD_c_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells he compressor that the ZSTD_outBuffer will not be resized between + * calls. Specifically: (out.size - out.pos) will never grow. This gives the + * compressor the freedom to say: If the compressed data doesn't fit in the + * output buffer then return ZSTD_error_dstSizeTooSmall. This allows us to + * always decompress directly into the output buffer, instead of decompressing + * into an internal buffer and copying to the output buffer. + * + * When this flag is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer. It will still allocate the + * input window buffer (see ZSTD_c_stableInBuffer). + * + * Zstd will check that (out.size - out.pos) never grows and return an error + * if it does. While not strictly necessary, this should prevent surprises. + */ +#define ZSTD_c_stableOutBuffer ZSTD_c_experimentalParam10 + +/* ZSTD_c_blockDelimiters + * Default is 0 == ZSTD_sf_noBlockDelimiters. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * + * Designates whether or not the given array of ZSTD_Sequence contains block delimiters + * and last literals, which are defined as sequences with offset == 0 and matchLength == 0. + * See the definition of ZSTD_Sequence for more specifics. + */ +#define ZSTD_c_blockDelimiters ZSTD_c_experimentalParam11 + +/* ZSTD_c_validateSequences + * Default is 0 == disabled. Set to 1 to enable sequence validation. + * + * For use with sequence compression API: ZSTD_compressSequences(). + * Designates whether or not we validate sequences provided to ZSTD_compressSequences() + * during function execution. + * + * Without validation, providing a sequence that does not conform to the zstd spec will cause + * undefined behavior, and may produce a corrupted block. + * + * With validation enabled, a if sequence is invalid (see doc/zstd_compression_format.md for + * specifics regarding offset/matchlength requirements) then the function will bail out and + * return an error. + * + */ +#define ZSTD_c_validateSequences ZSTD_c_experimentalParam12 + +/*! ZSTD_CCtx_getParameter() : + * Get the requested compression parameter value, selected by enum ZSTD_cParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value); + + +/*! ZSTD_CCtx_params : + * Quick howto : + * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure + * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into + * an existing ZSTD_CCtx_params structure. + * This is similar to + * ZSTD_CCtx_setParameter(). + * - ZSTD_CCtx_setParametersUsingCCtxParams() : Apply parameters to + * an existing CCtx. + * These parameters will be applied to + * all subsequent frames. + * - ZSTD_compressStream2() : Do compression using the CCtx. + * - ZSTD_freeCCtxParams() : Free the memory, accept NULL pointer. + * + * This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams() + * for static allocation of CCtx for single-threaded compression. + */ +ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void); +ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params); /* accept NULL pointer */ + +/*! ZSTD_CCtxParams_reset() : + * Reset params to default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params); + +/*! ZSTD_CCtxParams_init() : + * Initializes the compression parameters of cctxParams according to + * compression level. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel); + +/*! ZSTD_CCtxParams_init_advanced() : + * Initializes the compression and frame parameters of cctxParams according to + * params. All other parameters are reset to their default values. + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); + +/*! ZSTD_CCtxParams_setParameter() : + * Similar to ZSTD_CCtx_setParameter. + * Set one compression parameter, selected by enum ZSTD_cParameter. + * Parameters must be applied to a ZSTD_CCtx using + * ZSTD_CCtx_setParametersUsingCCtxParams(). + * @result : a code representing success or failure (which can be tested with + * ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); + +/*! ZSTD_CCtxParams_getParameter() : + * Similar to ZSTD_CCtx_getParameter. + * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. + * @result : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); + +/*! ZSTD_CCtx_setParametersUsingCCtxParams() : + * Apply a set of ZSTD_CCtx_params to the compression context. + * This can be done even after compression is started, + * if nbWorkers==0, this will have no impact until a new compression is started. + * if nbWorkers>=1, new parameters will be picked up at next job, + * with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated). + */ +ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params); + +/*! ZSTD_compressStream2_simpleArgs() : + * Same as ZSTD_compressStream2(), + * but using only integral types as arguments. + * This variant might be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp); + + +/*************************************** +* Advanced decompression functions +***************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, ready to start decompression operation without startup delay. + * Dictionary content is referenced, and therefore stays in dictBuffer. + * It is important that dictBuffer outlives DDict, + * it must remain read accessible throughout the lifetime of DDict */ +ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_byReference() : + * Same as ZSTD_DCtx_loadDictionary(), + * but references `dict` content instead of copying it into `dctx`. + * This saves memory if `dict` remains around., + * However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); + +/*! ZSTD_DCtx_loadDictionary_advanced() : + * Same as ZSTD_DCtx_loadDictionary(), + * but gives direct control over + * how to load the dictionary (by copy ? by reference ?) + * and how to interpret it (automatic ? force raw mode ? full mode only ?). */ +ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_refPrefix_advanced() : + * Same as ZSTD_DCtx_refPrefix(), but gives finer control over + * how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */ +ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType); + +/*! ZSTD_DCtx_setMaxWindowSize() : + * Refuses allocating internal buffers for frames requiring a window size larger than provided limit. + * This protects a decoder context from reserving too much memory for itself (potential attack scenario). + * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + * By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + * @return : 0, or an error code (which can be tested using ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize); + +/*! ZSTD_DCtx_getParameter() : + * Get the requested decompression parameter value, selected by enum ZSTD_dParameter, + * and store it into int* value. + * @return : 0, or an error code (which can be tested with ZSTD_isError()). + */ +ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value); + +/* ZSTD_d_format + * experimental parameter, + * allowing selection between ZSTD_format_e input compression formats + */ +#define ZSTD_d_format ZSTD_d_experimentalParam1 +/* ZSTD_d_stableOutBuffer + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable. + * + * Tells the decompressor that the ZSTD_outBuffer will ALWAYS be the same + * between calls, except for the modifications that zstd makes to pos (the + * caller must not modify pos). This is checked by the decompressor, and + * decompression will fail if it ever changes. Therefore the ZSTD_outBuffer + * MUST be large enough to fit the entire decompressed frame. This will be + * checked when the frame content size is known. The data in the ZSTD_outBuffer + * in the range [dst, dst + pos) MUST not be modified during decompression + * or you will get data corruption. + * + * When this flags is enabled zstd won't allocate an output buffer, because + * it can write directly to the ZSTD_outBuffer, but it will still allocate + * an input buffer large enough to fit any compressed block. This will also + * avoid the memcpy() from the internal output buffer to the ZSTD_outBuffer. + * If you need to avoid the input buffer allocation use the buffer-less + * streaming API. + * + * NOTE: So long as the ZSTD_outBuffer always points to valid memory, using + * this flag is ALWAYS memory safe, and will never access out-of-bounds + * memory. However, decompression WILL fail if you violate the preconditions. + * + * WARNING: The data in the ZSTD_outBuffer in the range [dst, dst + pos) MUST + * not be modified during decompression or you will get data corruption. This + * is because zstd needs to reference data in the ZSTD_outBuffer to regenerate + * matches. Normally zstd maintains its own buffer for this purpose, but passing + * this flag tells zstd to use the user provided buffer. + */ +#define ZSTD_d_stableOutBuffer ZSTD_d_experimentalParam2 + +/* ZSTD_d_forceIgnoreChecksum + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * Tells the decompressor to skip checksum validation during decompression, regardless + * of whether checksumming was specified during compression. This offers some + * slight performance benefits, and may be useful for debugging. + * Param has values of type ZSTD_forceIgnoreChecksum_e + */ +#define ZSTD_d_forceIgnoreChecksum ZSTD_d_experimentalParam3 + +/* ZSTD_d_refMultipleDDicts + * Experimental parameter. + * Default is 0 == disabled. Set to 1 to enable + * + * If enabled and dctx is allocated on the heap, then additional memory will be allocated + * to store references to multiple ZSTD_DDict. That is, multiple calls of ZSTD_refDDict() + * using a given ZSTD_DCtx, rather than overwriting the previous DDict reference, will instead + * store all references. At decompression time, the appropriate dictID is selected + * from the set of DDicts based on the dictID in the frame. + * + * Usage is simply calling ZSTD_refDDict() on multiple dict buffers. + * + * Param has values of byte ZSTD_refMultipleDDicts_e + * + * WARNING: Enabling this parameter and calling ZSTD_DCtx_refDDict(), will trigger memory + * allocation for the hash table. ZSTD_freeDCtx() also frees this memory. + * Memory is allocated as per ZSTD_DCtx::customMem. + * + * Although this function allocates memory for the table, the user is still responsible for + * memory management of the underlying ZSTD_DDict* themselves. + */ +#define ZSTD_d_refMultipleDDicts ZSTD_d_experimentalParam4 + + +/*! ZSTD_DCtx_setFormat() : + * Instruct the decoder context about what kind of data to decode next. + * This instruction is mandatory to decode data without a fully-formed header, + * such ZSTD_f_zstd1_magicless for example. + * @return : 0, or an error code (which can be tested using ZSTD_isError()). */ +ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format); + +/*! ZSTD_decompressStream_simpleArgs() : + * Same as ZSTD_decompressStream(), + * but using only integral types as arguments. + * This can be helpful for binders from dynamic languages + * which have troubles handling structures containing memory pointers. + */ +ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos); + + +/******************************************************************** +* Advanced streaming functions +* Warning : most of these functions are now redundant with the Advanced API. +* Once Advanced API reaches "stable" status, +* redundant functions will be deprecated, and then at some point removed. +********************************************************************/ + +/*===== Advanced Streaming compression functions =====*/ + +/*! ZSTD_initCStream_srcSize() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * pledgedSrcSize must be correct. If it is not known at init time, use + * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, + * "0" also disables frame content size field. It may be enabled in the future. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, + int compressionLevel, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingDict() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * Creates of an internal CDict (incompatible with static CCtx), except if + * dict == NULL or dictSize < 8, in which case no dict is used. + * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if + * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + int compressionLevel); + +/*! ZSTD_initCStream_advanced() : + * This function is deprecated, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd parameter and leave the rest as-is. + * for ((param, value) : params) { + * ZSTD_CCtx_setParameter(zcs, param, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); + * + * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy. + * pledgedSrcSize must be correct. + * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, + unsigned long long pledgedSrcSize); + +/*! ZSTD_initCStream_usingCDict() : + * This function is deprecated, and equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * note : cdict will just be referenced, and must outlive compression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); + +/*! ZSTD_initCStream_usingCDict_advanced() : + * This function is DEPRECATED, and is approximately equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * // Pseudocode: Set each zstd frame parameter and leave the rest as-is. + * for ((fParam, value) : fParams) { + * ZSTD_CCtx_setParameter(zcs, fParam, value); + * } + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * ZSTD_CCtx_refCDict(zcs, cdict); + * + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. + * pledgedSrcSize must be correct. If srcSize is not known at init time, use + * value ZSTD_CONTENTSIZE_UNKNOWN. + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t +ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize); + +/*! ZSTD_resetCStream() : + * This function is deprecated, and is equivalent to: + * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); + * + * start a new frame, using same parameters from previous frame. + * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. + * Note that zcs must be init at least once before using ZSTD_resetCStream(). + * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN. + * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end. + * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs, + * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead. + * @return : 0, or an error code (which can be tested using ZSTD_isError()) + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize); + + +typedef struct { + unsigned long long ingested; /* nb input bytes read and buffered */ + unsigned long long consumed; /* nb input bytes actually compressed */ + unsigned long long produced; /* nb of compressed bytes generated and buffered */ + unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */ + unsigned currentJobID; /* MT only : latest started job nb */ + unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */ +} ZSTD_frameProgression; + +/* ZSTD_getFrameProgression() : + * tells how much data has been ingested (read from input) + * consumed (input actually compressed) and produced (output) for current frame. + * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed. + * Aggregates progression inside active worker threads. + */ +ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx); + +/*! ZSTD_toFlushNow() : + * Tell how many bytes are ready to be flushed immediately. + * Useful for multithreading scenarios (nbWorkers >= 1). + * Probe the oldest active job, defined as oldest job not yet entirely flushed, + * and check its output buffer. + * @return : amount of data stored in oldest job and ready to be flushed immediately. + * if @return == 0, it means either : + * + there is no active job (could be checked with ZSTD_frameProgression()), or + * + oldest job is still actively compressing data, + * but everything it has produced has also been flushed so far, + * therefore flush speed is limited by production speed of oldest job + * irrespective of the speed of concurrent (and newer) jobs. + */ +ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx); + + +/*===== Advanced Streaming decompression functions =====*/ + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); + * + * note: no dictionary will be used if dict == NULL or dictSize < 8 + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * ZSTD_DCtx_refDDict(zds, ddict); + * + * note : ddict is referenced, it must outlive decompression session + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); + +/*! + * This function is deprecated, and is equivalent to: + * + * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); + * + * re-use decompression parameters from previous init; saves dictionary loading + * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x + */ +ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); + + +/********************************************************************* +* Buffer-less and synchronous inner streaming functions +* +* This is an advanced API, giving full control over buffer management, for users which need direct control over memory. +* But it's also a complex one, with several restrictions, documented below. +* Prefer normal streaming API for an easier experience. +********************************************************************* */ + +/** + Buffer-less streaming compression (synchronous mode) + + A ZSTD_CCtx object is required to track streaming operations. + Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource. + ZSTD_CCtx object can be re-used multiple times within successive compression operations. + + Start by initializing a context. + Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression, + or ZSTD_compressBegin_advanced(), for finer parameter control. + It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx() + + Then, consume your input using ZSTD_compressContinue(). + There are some important considerations to keep in mind when using this advanced function : + - ZSTD_compressContinue() has no internal buffer. It uses externally provided buffers only. + - Interface is synchronous : input is consumed entirely and produces 1+ compressed blocks. + - Caller must ensure there is enough space in `dst` to store compressed data under worst case scenario. + Worst case evaluation is provided by ZSTD_compressBound(). + ZSTD_compressContinue() doesn't guarantee recover after a failed compression. + - ZSTD_compressContinue() presumes prior input ***is still accessible and unmodified*** (up to maximum distance size, see WindowLog). + It remembers all previous contiguous blocks, plus one separated memory segment (which can itself consists of multiple contiguous blocks) + - ZSTD_compressContinue() detects that prior input has been overwritten when `src` buffer overlaps. + In which case, it will "discard" the relevant memory section from its history. + + Finish a frame with ZSTD_compressEnd(), which will write the last block(s) and optional checksum. + It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame. + Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders. + + `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again. +*/ + +/*===== Buffer-less streaming compression functions =====*/ +ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel); +ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /**< note: fails if cdict==NULL */ +ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize); /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */ +ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /**< note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + + +/** + Buffer-less streaming decompression (synchronous mode) + + A ZSTD_DCtx object is required to track streaming operations. + Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it. + A ZSTD_DCtx object can be re-used multiple times. + + First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader(). + Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough. + Data fragment must be large enough to ensure successful decoding. + `ZSTD_frameHeaderSize_max` bytes is guaranteed to always be large enough. + @result : 0 : successful decoding, the `ZSTD_frameHeader` structure is correctly filled. + >0 : `srcSize` is too small, please provide at least @result bytes on next attempt. + errorCode, which can be tested using ZSTD_isError(). + + It fills a ZSTD_frameHeader structure with important information to correctly decode the frame, + such as the dictionary ID, content size, or maximum back-reference distance (`windowSize`). + Note that these values could be wrong, either because of data corruption, or because a 3rd party deliberately spoofs false information. + As a consequence, check that values remain within valid application range. + For example, do not allocate memory blindly, check that `windowSize` is within expectation. + Each application can set its own limits, depending on local restrictions. + For extended interoperability, it is recommended to support `windowSize` of at least 8 MB. + + ZSTD_decompressContinue() needs previous data blocks during decompression, up to `windowSize` bytes. + ZSTD_decompressContinue() is very sensitive to contiguity, + if 2 blocks don't follow each other, make sure that either the compressor breaks contiguity at the same place, + or that previous contiguous segment is large enough to properly handle maximum back-reference distance. + There are multiple ways to guarantee this condition. + + The most memory efficient way is to use a round buffer of sufficient size. + Sufficient size is determined by invoking ZSTD_decodingBufferSize_min(), + which can @return an error code if required value is too large for current system (in 32-bits mode). + In a round buffer methodology, ZSTD_decompressContinue() decompresses each block next to previous one, + up to the moment there is not enough room left in the buffer to guarantee decoding another full block, + which maximum size is provided in `ZSTD_frameHeader` structure, field `blockSizeMax`. + At which point, decoding can resume from the beginning of the buffer. + Note that already decoded data stored in the buffer should be flushed before being overwritten. + + There are alternatives possible, for example using two or more buffers of size `windowSize` each, though they consume more memory. + + Finally, if you control the compression process, you can also ignore all buffer size rules, + as long as the encoder and decoder progress in "lock-step", + aka use exactly the same buffer sizes, break contiguity at the same place, etc. + + Once buffers are setup, start decompression, with ZSTD_decompressBegin(). + If decompression requires a dictionary, use ZSTD_decompressBegin_usingDict() or ZSTD_decompressBegin_usingDDict(). + + Then use ZSTD_nextSrcSizeToDecompress() and ZSTD_decompressContinue() alternatively. + ZSTD_nextSrcSizeToDecompress() tells how many bytes to provide as 'srcSize' to ZSTD_decompressContinue(). + ZSTD_decompressContinue() requires this _exact_ amount of bytes, or it will fail. + + @result of ZSTD_decompressContinue() is the number of bytes regenerated within 'dst' (necessarily <= dstCapacity). + It can be zero : it just means ZSTD_decompressContinue() has decoded some metadata item. + It can also be an error code, which can be tested with ZSTD_isError(). + + A frame is fully decoded when ZSTD_nextSrcSizeToDecompress() returns zero. + Context can then be reset to start a new decompression. + + Note : it's possible to know if next input to present is a header or a block, using ZSTD_nextInputType(). + This information is not required to properly decode a frame. + + == Special case : skippable frames == + + Skippable frames allow integration of user-defined data into a flow of concatenated frames. + Skippable frames will be ignored (skipped) by decompressor. + The format of skippable frames is as follows : + a) Skippable frame ID - 4 Bytes, Little endian format, any value from 0x184D2A50 to 0x184D2A5F + b) Frame Size - 4 Bytes, Little endian format, unsigned 32-bits + c) Frame Content - any content (User Data) of length equal to Frame Size + For skippable frames ZSTD_getFrameHeader() returns zfhPtr->frameType==ZSTD_skippableFrame. + For skippable frames ZSTD_decompressContinue() always returns 0 : it only skips the content. +*/ + +/*===== Buffer-less streaming decompression functions =====*/ +typedef enum { ZSTD_frame, ZSTD_skippableFrame } ZSTD_frameType_e; +typedef struct { + unsigned long long frameContentSize; /* if == ZSTD_CONTENTSIZE_UNKNOWN, it means this field is not available. 0 means "empty" */ + unsigned long long windowSize; /* can be very large, up to <= frameContentSize */ + unsigned blockSizeMax; + ZSTD_frameType_e frameType; /* if == ZSTD_skippableFrame, frameContentSize is the size of skippable content */ + unsigned headerSize; + unsigned dictID; + unsigned checksumFlag; +} ZSTD_frameHeader; + +/*! ZSTD_getFrameHeader() : + * decode Frame Header, or requires larger `srcSize`. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */ +/*! ZSTD_getFrameHeader_advanced() : + * same as ZSTD_getFrameHeader(), + * with added capability to select a format (like ZSTD_f_zstd1_magicless) */ +ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format); +ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */ + +ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize); +ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + +ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx); +ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +/* misc */ +ZSTDLIB_API void ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx); +typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e; +ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx); + + + + +/* ============================ */ +/** Block level API */ +/* ============================ */ + +/*! + Block functions produce and decode raw zstd blocks, without frame metadata. + Frame metadata cost is typically ~12 bytes, which can be non-negligible for very small blocks (< 100 bytes). + But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. + + A few rules to respect : + - Compressing and decompressing require a context structure + + Use ZSTD_createCCtx() and ZSTD_createDCtx() + - It is necessary to init context before starting + + compression : any ZSTD_compressBegin*() variant, including with dictionary + + decompression : any ZSTD_decompressBegin*() variant, including with dictionary + + copyCCtx() and copyDCtx() can be used too + - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB + + If input is larger than a block size, it's necessary to split input data into multiple blocks + + For inputs larger than a single block, consider using regular ZSTD_compress() instead. + Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. + - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! + ===> In which case, nothing is produced into `dst` ! + + User __must__ test for such outcome and deal directly with uncompressed data + + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. + Doing so would mess up with statistics history, leading to potential data corruption. + + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! + + In case of multiple successive blocks, should some of them be uncompressed, + decoder must be informed of their existence in order to follow proper history. + Use ZSTD_insertBlock() for such a case. +*/ + +/*===== Raw zstd block functions =====*/ +ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx); +ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); +ZSTDLIB_API size_t ZSTD_insertBlock (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */ + + +#endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ + diff --git a/include/media/rc-map.h b/include/media/rc-map.h index 999b750bc6b8..30f138ebab6f 100644 --- a/include/media/rc-map.h +++ b/include/media/rc-map.h @@ -175,6 +175,13 @@ struct rc_map_list { struct rc_map map; }; +#ifdef CONFIG_MEDIA_CEC_RC +/* + * rc_map_list from rc-cec.c + */ +extern struct rc_map_list cec_map; +#endif + /* Routines from rc-map.c */ /** diff --git a/include/net/act_api.h b/include/net/act_api.h index 55dab604861f..2bf3092ae7ec 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -166,6 +166,7 @@ int tcf_idr_create_from_flags(struct tc_action_net *tn, u32 index, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int bind, u32 flags); +void tcf_idr_insert_many(struct tc_action *actions[]); void tcf_idr_cleanup(struct tc_action_net *tn, u32 index); int tcf_idr_check_alloc(struct tc_action_net *tn, u32 *index, struct tc_action **a, int bind); @@ -186,10 +187,13 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, struct tc_action *actions[], size_t *attr_size, bool rtnl_held, struct netlink_ext_ack *extack); +struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, + bool rtnl_held, + struct netlink_ext_ack *extack); struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, - bool rtnl_held, + struct tc_action_ops *ops, bool rtnl_held, struct netlink_ext_ack *extack); int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind, int ref, bool terse); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index c1504aa3d9cf..ba2f439bc04d 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -238,6 +238,14 @@ enum { * during the hdev->setup vendor callback. */ HCI_QUIRK_BROKEN_ERR_DATA_REPORTING, + + /* + * When this quirk is set, then the hci_suspend_notifier is not + * registered. This is intended for devices which drop completely + * from the bus on system-suspend and which will show up as a new + * HCI after resume. + */ + HCI_QUIRK_NO_SUSPEND_NOTIFIER, }; /* HCI device flags */ diff --git a/include/net/dst.h b/include/net/dst.h index 10f0a8399867..8d7cf51766c4 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -533,4 +533,15 @@ static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu) dst->ops->update_pmtu(dst, NULL, skb, mtu, false); } +struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie); +void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu, bool confirm_neigh); +void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb); +u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old); +struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr); +unsigned int dst_blackhole_mtu(const struct dst_entry *dst); + #endif /* _NET_DST_H */ diff --git a/include/net/icmp.h b/include/net/icmp.h index 9ac2d2672a93..fd84adc47963 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -46,7 +46,11 @@ static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 #if IS_ENABLED(CONFIG_NF_NAT) void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info); #else -#define icmp_ndo_send icmp_send +static inline void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) +{ + struct ip_options opts = { 0 }; + __icmp_send(skb_in, type, code, info, &opts); +} #endif int icmp_rcv(struct sk_buff *skb); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 111d7771b208..493b2c3a449f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -134,8 +134,9 @@ struct inet_connection_sock { u32 icsk_probes_tstamp; u32 icsk_user_timeout; - u64 icsk_ca_priv[104 / sizeof(u64)]; -#define ICSK_CA_PRIV_SIZE (13 * sizeof(u64)) +/* XXX inflated by temporary internal debugging info */ +#define ICSK_CA_PRIV_SIZE (216) + u64 icsk_ca_priv[ICSK_CA_PRIV_SIZE / sizeof(u64)]; }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ @@ -284,7 +285,7 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) return inet_csk_reqsk_queue_len(sk) >= sk->sk_max_ack_backlog; } -void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); +bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req); void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req); static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 4b6ecf532623..6799f95eea65 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1531,6 +1531,7 @@ struct nft_trans_flowtable { struct nft_flowtable *flowtable; bool update; struct list_head hook_list; + u32 flags; }; #define nft_trans_flowtable(trans) \ @@ -1539,6 +1540,8 @@ struct nft_trans_flowtable { (((struct nft_trans_flowtable *)trans->data)->update) #define nft_trans_flowtable_hooks(trans) \ (((struct nft_trans_flowtable *)trans->data)->hook_list) +#define nft_trans_flowtable_flags(trans) \ + (((struct nft_trans_flowtable *)trans->data)->flags) int __init nft_chain_filter_init(void); void nft_chain_filter_fini(void); diff --git a/include/net/nexthop.h b/include/net/nexthop.h index 226930d66b63..abd620103cec 100644 --- a/include/net/nexthop.h +++ b/include/net/nexthop.h @@ -400,6 +400,7 @@ static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel) int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg, struct netlink_ext_ack *extack); +/* Caller should either hold rcu_read_lock(), or RTNL. */ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) { struct nh_info *nhi; @@ -420,6 +421,29 @@ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh) return NULL; } +/* Variant of nexthop_fib6_nh(). + * Caller should either hold rcu_read_lock_bh(), or RTNL. + */ +static inline struct fib6_nh *nexthop_fib6_nh_bh(struct nexthop *nh) +{ + struct nh_info *nhi; + + if (nh->is_group) { + struct nh_group *nh_grp; + + nh_grp = rcu_dereference_bh_rtnl(nh->nh_grp); + nh = nexthop_mpath_select(nh_grp, 0); + if (!nh) + return NULL; + } + + nhi = rcu_dereference_bh_rtnl(nh->nh_info); + if (nhi->family == AF_INET6) + return &nhi->fib6_nh; + + return NULL; +} + static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i) { struct fib6_nh *fib6_nh; diff --git a/include/net/red.h b/include/net/red.h index 932f0d79d60c..9e6647c4ccd1 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -168,7 +168,8 @@ static inline void red_set_vars(struct red_vars *v) v->qcount = -1; } -static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog, u8 Scell_log) +static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog, + u8 Scell_log, u8 *stab) { if (fls(qth_min) + Wlog > 32) return false; @@ -178,6 +179,13 @@ static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog, u8 Scell_ return false; if (qth_max < qth_min) return false; + if (stab) { + int i; + + for (i = 0; i < RED_STAB_SIZE; i++) + if (stab[i] >= 32) + return false; + } return true; } diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index e2091bb2b3a8..4da61c950e93 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -33,6 +33,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh) * * @list: Used internally * @kind: Identifier + * @netns_refund: Physical device, move to init_net on netns exit * @maxtype: Highest device specific netlink attribute number * @policy: Netlink policy for device specific attribute validation * @validate: Optional validation function for netlink/changelink parameters @@ -64,6 +65,7 @@ struct rtnl_link_ops { size_t priv_size; void (*setup)(struct net_device *dev); + bool netns_refund; unsigned int maxtype; const struct nla_policy *policy; int (*validate)(struct nlattr *tb[], diff --git a/include/net/tcp.h b/include/net/tcp.h index 25bbada379c4..7847cef7bf24 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -798,6 +798,11 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) return max_t(s64, t1 - t0, 0); } +static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0) +{ + return max_t(s32, t1 - t0, 0); +} + static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) { return tcp_ns_to_ts(skb->skb_mstamp_ns); @@ -865,16 +870,22 @@ struct tcp_skb_cb { __u32 ack_seq; /* Sequence number ACK'd */ union { struct { +#define TCPCB_DELIVERED_CE_MASK ((1U<<20) - 1) /* There is space for up to 24 bytes */ - __u32 in_flight:30,/* Bytes in flight at transmit */ - is_app_limited:1, /* cwnd not fully used? */ - unused:1; + __u32 is_app_limited:1, /* cwnd not fully used? */ + delivered_ce:20, + unused:11; /* pkts S/ACKed so far upon tx of skb, incl retrans: */ __u32 delivered; /* start of send pipeline phase */ - u64 first_tx_mstamp; + u32 first_tx_mstamp; /* when we reached the "delivered" count */ - u64 delivered_mstamp; + u32 delivered_mstamp; +#define TCPCB_IN_FLIGHT_BITS 20 +#define TCPCB_IN_FLIGHT_MAX ((1U << TCPCB_IN_FLIGHT_BITS) - 1) + u32 in_flight:20, /* packets in flight at transmit */ + unused2:12; + u32 lost; /* packets lost so far upon tx of skb */ } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; @@ -1024,7 +1035,11 @@ enum tcp_ca_ack_event_flags { #define TCP_CONG_NON_RESTRICTED 0x1 /* Requires ECN/ECT set on all packets */ #define TCP_CONG_NEEDS_ECN 0x2 -#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN) +/* Wants notification of CE events (CA_EVENT_ECN_IS_CE, CA_EVENT_ECN_NO_CE). */ +#define TCP_CONG_WANTS_CE_EVENTS 0x4 +#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | \ + TCP_CONG_NEEDS_ECN | \ + TCP_CONG_WANTS_CE_EVENTS) union tcp_cc_info; @@ -1044,8 +1059,13 @@ struct ack_sample { */ struct rate_sample { u64 prior_mstamp; /* starting timestamp for interval */ + u32 prior_lost; /* tp->lost at "prior_mstamp" */ u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ + u32 prior_delivered_ce;/* tp->delivered_ce at "prior_mstamp" */ + u32 tx_in_flight; /* packets in flight at starting timestamp */ + s32 lost; /* number of packets lost over interval */ s32 delivered; /* number of packets delivered over interval */ + s32 delivered_ce; /* packets delivered w/ CE mark over interval */ long interval_us; /* time for tp->delivered to incr "delivered" */ u32 snd_interval_us; /* snd interval for delivered packets */ u32 rcv_interval_us; /* rcv interval for delivered packets */ @@ -1056,6 +1076,7 @@ struct rate_sample { bool is_app_limited; /* is sample from packet with bubble in pipe? */ bool is_retrans; /* is sample from retransmission? */ bool is_ack_delayed; /* is this (likely) a delayed ACK? */ + bool is_ece; /* did this ACK have ECN marked? */ }; struct tcp_congestion_ops { @@ -1082,10 +1103,12 @@ struct tcp_congestion_ops { u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); - /* override sysctl_tcp_min_tso_segs */ - u32 (*min_tso_segs)(struct sock *sk); + /* pick target number of segments per TSO/GSO skb (optional): */ + u32 (*tso_segs)(struct sock *sk, unsigned int mss_now); /* returns the multiplier used in tcp_sndbuf_expand (optional) */ u32 (*sndbuf_expand)(struct sock *sk); + /* react to a specific lost skb (optional) */ + void (*skb_marked_lost)(struct sock *sk, const struct sk_buff *skb); /* call when packets are delivered to update cwnd and pacing rate, * after all the ca_state processing. (optional) */ @@ -1131,6 +1154,14 @@ static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer) } #endif +static inline bool tcp_ca_wants_ce_events(const struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + + return icsk->icsk_ca_ops->flags & (TCP_CONG_NEEDS_ECN | + TCP_CONG_WANTS_CE_EVENTS); +} + static inline bool tcp_ca_needs_ecn(const struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -1156,6 +1187,7 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) } /* From tcp_rate.c */ +void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, struct rate_sample *rs); @@ -1431,8 +1463,13 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied); */ static inline bool tcp_rmem_pressure(const struct sock *sk) { - int rcvbuf = READ_ONCE(sk->sk_rcvbuf); - int threshold = rcvbuf - (rcvbuf >> 3); + int rcvbuf, threshold; + + if (tcp_under_memory_pressure(sk)) + return true; + + rcvbuf = READ_ONCE(sk->sk_rcvbuf); + threshold = rcvbuf - (rcvbuf >> 3); return atomic_read(&sk->sk_rmem_alloc) > threshold; } diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h index 4e2d61e8fb1e..e6a43163ab5b 100644 --- a/include/scsi/libsas.h +++ b/include/scsi/libsas.h @@ -391,10 +391,6 @@ struct sas_ha_struct { int strict_wide_ports; /* both sas_addr and attached_sas_addr must match * their siblings when forming wide ports */ - /* LLDD calls these to notify the class of an event. */ - int (*notify_port_event)(struct asd_sas_phy *, enum port_event); - int (*notify_phy_event)(struct asd_sas_phy *, enum phy_event); - void *lldd_ha; /* not touched by sas class code */ struct list_head eh_done_q; /* complete via scsi_eh_flush_done_q */ @@ -706,4 +702,11 @@ struct sas_phy *sas_get_local_phy(struct domain_device *dev); int sas_request_addr(struct Scsi_Host *shost, u8 *addr); +int sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event); +int sas_notify_phy_event(struct asd_sas_phy *phy, enum phy_event event); +int sas_notify_port_event_gfp(struct asd_sas_phy *phy, enum port_event event, + gfp_t gfp_flags); +int sas_notify_phy_event_gfp(struct asd_sas_phy *phy, enum phy_event event, + gfp_t gfp_flags); + #endif /* _SASLIB_H_ */ diff --git a/include/sound/intel-nhlt.h b/include/sound/intel-nhlt.h index 743c2f442280..d0574805865f 100644 --- a/include/sound/intel-nhlt.h +++ b/include/sound/intel-nhlt.h @@ -112,6 +112,11 @@ struct nhlt_vendor_dmic_array_config { /* TODO add vendor mic config */ } __packed; +enum { + NHLT_CONFIG_TYPE_GENERIC = 0, + NHLT_CONFIG_TYPE_MIC_ARRAY = 1 +}; + enum { NHLT_MIC_ARRAY_2CH_SMALL = 0xa, NHLT_MIC_ARRAY_2CH_BIG = 0xb, diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 6336780d83a7..ce2fba49c95d 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -72,6 +72,7 @@ int transport_backend_register(const struct target_backend_ops *); void target_backend_unregister(const struct target_backend_ops *); void target_complete_cmd(struct se_cmd *, u8); +void target_set_cmd_data_length(struct se_cmd *, int); void target_complete_cmd_with_length(struct se_cmd *, u8, int); void transport_copy_sense_to_cmd(struct se_cmd *, unsigned char *); diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 723c8e23ca87..5f42a14481bd 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -1036,9 +1036,9 @@ drm_fourcc_canonicalize_nvidia_format_mod(__u64 modifier) * Not all combinations are valid, and different SoCs may support different * combinations of layout and options. */ -#define __fourcc_mod_amlogic_layout_mask 0xf +#define __fourcc_mod_amlogic_layout_mask 0xff #define __fourcc_mod_amlogic_options_shift 8 -#define __fourcc_mod_amlogic_options_mask 0xf +#define __fourcc_mod_amlogic_options_mask 0xff #define DRM_FORMAT_MOD_AMLOGIC_FBC(__layout, __options) \ fourcc_mod_code(AMLOGIC, \ diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index b49fbf2bdc40..1c064627e6c3 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -414,15 +414,12 @@ enum drm_mode_subconnector { * * If the @count_modes field is set to zero, the kernel will perform a forced * probe on the connector to refresh the connector status, modes and EDID. - * A forced-probe can be slow and the ioctl will block. A force-probe can cause - * flickering and temporary freezes, so it should not be performed - * automatically. + * A forced-probe can be slow, might cause flickering and the ioctl will block. * - * User-space shouldn't need to force-probe connectors in general: the kernel - * will automatically take care of probing connectors that don't support - * hot-plug detection when appropriate. However, user-space may force-probe - * connectors on user request (e.g. clicking a "Scan connectors" button, or - * opening a UI to manage screens). + * User-space needs to force-probe connectors to ensure their metadata is + * up-to-date at startup and after receiving a hot-plug event. User-space + * may perform a forced-probe when the user explicitly requests it. User-space + * shouldn't perform a forced-probe in other situations. */ struct drm_mode_get_connector { /** @encoders_ptr: Pointer to ``__u32`` array of object IDs. */ diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 20ee93f0f876..96d52dd9c48a 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -231,9 +231,42 @@ struct tcp_bbr_info { __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ }; +/* Phase as reported in netlink/ss stats. */ +enum tcp_bbr2_phase { + BBR2_PHASE_INVALID = 0, + BBR2_PHASE_STARTUP = 1, + BBR2_PHASE_DRAIN = 2, + BBR2_PHASE_PROBE_RTT = 3, + BBR2_PHASE_PROBE_BW_UP = 4, + BBR2_PHASE_PROBE_BW_DOWN = 5, + BBR2_PHASE_PROBE_BW_CRUISE = 6, + BBR2_PHASE_PROBE_BW_REFILL = 7 +}; + +struct tcp_bbr2_info { + /* u64 bw: bandwidth (app throughput) estimate in Byte per sec: */ + __u32 bbr_bw_lsb; /* lower 32 bits of bw */ + __u32 bbr_bw_msb; /* upper 32 bits of bw */ + __u32 bbr_min_rtt; /* min-filtered RTT in uSec */ + __u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */ + __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ + __u32 bbr_bw_hi_lsb; /* lower 32 bits of bw_hi */ + __u32 bbr_bw_hi_msb; /* upper 32 bits of bw_hi */ + __u32 bbr_bw_lo_lsb; /* lower 32 bits of bw_lo */ + __u32 bbr_bw_lo_msb; /* upper 32 bits of bw_lo */ + __u8 bbr_mode; /* current bbr_mode in state machine */ + __u8 bbr_phase; /* current state machine phase */ + __u8 unused1; /* alignment padding; not used yet */ + __u8 bbr_version; /* MUST be at this offset in struct */ + __u32 bbr_inflight_lo; /* lower/short-term data volume bound */ + __u32 bbr_inflight_hi; /* higher/long-term data volume bound */ + __u32 bbr_extra_acked; /* max excess packets ACKed in epoch */ +}; + union tcp_cc_info { struct tcpvegas_info vegas; struct tcp_dctcp_info dctcp; struct tcp_bbr_info bbr; + struct tcp_bbr2_info bbr2; }; #endif /* _UAPI_INET_DIAG_H_ */ diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h index 30c80d5ba4bf..bab8c9708611 100644 --- a/include/uapi/linux/l2tp.h +++ b/include/uapi/linux/l2tp.h @@ -145,6 +145,7 @@ enum { L2TP_ATTR_RX_ERRORS, /* u64 */ L2TP_ATTR_STATS_PAD, L2TP_ATTR_RX_COOKIE_DISCARDS, /* u64 */ + L2TP_ATTR_RX_INVALID, /* u64 */ __L2TP_ATTR_STATS_MAX, }; diff --git a/include/uapi/linux/netfilter/nfnetlink_cthelper.h b/include/uapi/linux/netfilter/nfnetlink_cthelper.h index a13137afc429..70af02092d16 100644 --- a/include/uapi/linux/netfilter/nfnetlink_cthelper.h +++ b/include/uapi/linux/netfilter/nfnetlink_cthelper.h @@ -5,7 +5,7 @@ #define NFCT_HELPER_STATUS_DISABLED 0 #define NFCT_HELPER_STATUS_ENABLED 1 -enum nfnl_acct_msg_types { +enum nfnl_cthelper_msg_types { NFNL_MSG_CTHELPER_NEW, NFNL_MSG_CTHELPER_GET, NFNL_MSG_CTHELPER_DEL, diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index ee95f42fb0ec..88f4bf0047e7 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -591,6 +591,8 @@ enum { TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED = 1 << 1, /* Part of an existing connection. */ TCA_FLOWER_KEY_CT_FLAGS_RELATED = 1 << 2, /* Related to an established connection. */ TCA_FLOWER_KEY_CT_FLAGS_TRACKED = 1 << 3, /* Conntrack has occurred. */ + + __TCA_FLOWER_KEY_CT_FLAGS_MAX, }; enum { diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h index aea26ab1431c..bff5032c98df 100644 --- a/include/uapi/linux/psample.h +++ b/include/uapi/linux/psample.h @@ -3,7 +3,6 @@ #define __UAPI_PSAMPLE_H enum { - /* sampled packet metadata */ PSAMPLE_ATTR_IIFINDEX, PSAMPLE_ATTR_OIFINDEX, PSAMPLE_ATTR_ORIGSIZE, @@ -11,10 +10,8 @@ enum { PSAMPLE_ATTR_GROUP_SEQ, PSAMPLE_ATTR_SAMPLE_RATE, PSAMPLE_ATTR_DATA, - PSAMPLE_ATTR_TUNNEL, - - /* commands attributes */ PSAMPLE_ATTR_GROUP_REFCOUNT, + PSAMPLE_ATTR_TUNNEL, __PSAMPLE_ATTR_MAX }; diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h index b9c937b3a149..0b1182a3cf41 100644 --- a/include/xen/grant_table.h +++ b/include/xen/grant_table.h @@ -157,6 +157,7 @@ gnttab_set_map_op(struct gnttab_map_grant_ref *map, phys_addr_t addr, map->flags = flags; map->ref = ref; map->dom = domid; + map->status = 1; /* arbitrary positive value */ } static inline void diff --git a/init/Kconfig b/init/Kconfig index 29ad68325028..c14c63251da8 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1172,6 +1172,22 @@ config USER_NS If unsure, say N. +config USER_NS_UNPRIVILEGED + bool "Allow unprivileged users to create namespaces" + default y + depends on USER_NS + help + When disabled, unprivileged users will not be able to create + new namespaces. Allowing users to create their own namespaces + has been part of several recent local privilege escalation + exploits, so if you need user namespaces but are + paranoid^Wsecurity-conscious you want to disable this. + + This setting can be overridden at runtime via the + kernel.unprivileged_userns_clone sysctl. + + If unsure, say Y. + config PID_NS bool "PID Namespaces" default y @@ -1193,6 +1209,7 @@ endif # NAMESPACES config CHECKPOINT_RESTORE bool "Checkpoint/restore support" select PROC_CHILDREN + select KCMP default n help Enables additional kernel features in a sake of checkpoint/restore. @@ -1310,7 +1327,6 @@ config CC_OPTIMIZE_FOR_PERFORMANCE config CC_OPTIMIZE_FOR_PERFORMANCE_O3 bool "Optimize more for performance (-O3)" - depends on ARC help Choosing this option will pass "-O3" to your compiler to optimize the kernel yet more for performance. @@ -1736,6 +1752,16 @@ config ARCH_HAS_MEMBARRIER_CALLBACKS config ARCH_HAS_MEMBARRIER_SYNC_CORE bool +config KCMP + bool "Enable kcmp() system call" if EXPERT + help + Enable the kernel resource comparison system call. It provides + user-space with the ability to compare two processes to see if they + share a common resource, such as a file descriptor or even virtual + memory space. + + If unsure, say N. + config RSEQ bool "Enable rseq() system call" if EXPERT default y @@ -2215,8 +2241,8 @@ config MODULE_COMPRESS bool "Compress modules on installation" help - Compresses kernel modules when 'make modules_install' is run; gzip or - xz depending on "Compression algorithm" below. + Compresses kernel modules when 'make modules_install' is run; gzip, + xz, or zstd depending on "Compression algorithm" below. module-init-tools MAY support gzip, and kmod MAY support gzip and xz. @@ -2238,7 +2264,7 @@ choice This determines which sort of compression will be used during 'make modules_install'. - GZIP (default) and XZ are supported. + GZIP (default), XZ, and ZSTD are supported. config MODULE_COMPRESS_GZIP bool "GZIP" @@ -2246,6 +2272,9 @@ config MODULE_COMPRESS_GZIP config MODULE_COMPRESS_XZ bool "XZ" +config MODULE_COMPRESS_ZSTD + bool "ZSTD" + endchoice config MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS diff --git a/init/main.c b/init/main.c index a626e78dbf06..aeef291bf28d 100644 --- a/init/main.c +++ b/init/main.c @@ -1423,6 +1423,7 @@ static int __ref kernel_init(void *unused) async_synchronize_full(); kprobe_free_init_mem(); ftrace_free_init_mem(); + kgdb_free_init_mem(); free_initmem(); mark_readonly(); diff --git a/kernel/Makefile b/kernel/Makefile index aa7368c7eabf..320f1f3941b7 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -51,7 +51,7 @@ obj-y += livepatch/ obj-y += dma/ obj-y += entry/ -obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o +obj-$(CONFIG_KCMP) += kcmp.o obj-$(CONFIG_FREEZER) += freezer.o obj-$(CONFIG_PROFILING) += profile.o obj-$(CONFIG_STACKTRACE) += stacktrace.o diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 6639640523c0..b58b2efb9b43 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -109,7 +109,7 @@ static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key) fd = *(int *)key; f = fget_raw(fd); if (!f) - return NULL; + return ERR_PTR(-EBADF); sdata = inode_storage_lookup(f->f_inode, map, true); fput(f); diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c index 5454161407f1..a0d9eade9c80 100644 --- a/kernel/bpf/bpf_iter.c +++ b/kernel/bpf/bpf_iter.c @@ -287,7 +287,7 @@ int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info) { struct bpf_iter_target_info *tinfo; - tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); + tinfo = kzalloc(sizeof(*tinfo), GFP_KERNEL); if (!tinfo) return -ENOMEM; diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c index 1b6b9349cb85..d99e89f113c4 100644 --- a/kernel/bpf/bpf_lru_list.c +++ b/kernel/bpf/bpf_lru_list.c @@ -502,13 +502,14 @@ struct bpf_lru_node *bpf_lru_pop_free(struct bpf_lru *lru, u32 hash) static void bpf_common_lru_push_free(struct bpf_lru *lru, struct bpf_lru_node *node) { + u8 node_type = READ_ONCE(node->type); unsigned long flags; - if (WARN_ON_ONCE(node->type == BPF_LRU_LIST_T_FREE) || - WARN_ON_ONCE(node->type == BPF_LRU_LOCAL_LIST_T_FREE)) + if (WARN_ON_ONCE(node_type == BPF_LRU_LIST_T_FREE) || + WARN_ON_ONCE(node_type == BPF_LRU_LOCAL_LIST_T_FREE)) return; - if (node->type == BPF_LRU_LOCAL_LIST_T_PENDING) { + if (node_type == BPF_LRU_LOCAL_LIST_T_PENDING) { struct bpf_lru_locallist *loc_l; loc_l = per_cpu_ptr(lru->common_lru.local_list, node->cpu); diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 1a666a975416..70f6fd4fa305 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -430,7 +430,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, tprogs[BPF_TRAMP_FENTRY].progs[0] = prog; tprogs[BPF_TRAMP_FENTRY].nr_progs = 1; - err = arch_prepare_bpf_trampoline(image, + err = arch_prepare_bpf_trampoline(NULL, image, st_map->image + PAGE_SIZE, &st_ops->func_models[i], 0, tprogs, NULL); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 261f8692d0d2..1de87fcaeabd 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -817,7 +817,7 @@ static int __init bpf_jit_charge_init(void) } pure_initcall(bpf_jit_charge_init); -static int bpf_jit_charge_modmem(u32 pages) +int bpf_jit_charge_modmem(u32 pages) { if (atomic_long_add_return(pages, &bpf_jit_current) > (bpf_jit_limit >> PAGE_SHIFT)) { @@ -830,7 +830,7 @@ static int bpf_jit_charge_modmem(u32 pages) return 0; } -static void bpf_jit_uncharge_modmem(u32 pages) +void bpf_jit_uncharge_modmem(u32 pages) { atomic_long_sub(pages, &bpf_jit_current); } diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index f6e9c68afdd4..85d9d1b72a33 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -802,9 +802,7 @@ static int dev_map_notification(struct notifier_block *notifier, break; /* will be freed in free_netdev() */ - netdev->xdp_bulkq = - __alloc_percpu_gfp(sizeof(struct xdp_dev_bulk_queue), - sizeof(void *), GFP_ATOMIC); + netdev->xdp_bulkq = alloc_percpu(struct xdp_dev_bulk_queue); if (!netdev->xdp_bulkq) return NOTIFY_BAD; diff --git a/kernel/bpf/preload/bpf_preload_kern.c b/kernel/bpf/preload/bpf_preload_kern.c index 79c5772465f1..53736e52c1df 100644 --- a/kernel/bpf/preload/bpf_preload_kern.c +++ b/kernel/bpf/preload/bpf_preload_kern.c @@ -60,9 +60,12 @@ static int finish(void) &magic, sizeof(magic), &pos); if (n != sizeof(magic)) return -EPIPE; + tgid = umd_ops.info.tgid; - wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); - umd_ops.info.tgid = NULL; + if (tgid) { + wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); + umd_cleanup_helper(&umd_ops.info); + } return 0; } @@ -80,10 +83,18 @@ static int __init load_umd(void) static void __exit fini_umd(void) { + struct pid *tgid; + bpf_preload_ops = NULL; + /* kill UMD in case it's still there due to earlier error */ - kill_pid(umd_ops.info.tgid, SIGKILL, 1); - umd_ops.info.tgid = NULL; + tgid = umd_ops.info.tgid; + if (tgid) { + kill_pid(tgid, SIGKILL, 1); + + wait_event(tgid->wait_pidfd, thread_group_exited(tgid)); + umd_cleanup_helper(&umd_ops.info); + } umd_unload_blob(&umd_ops.info); } late_initcall(load_umd); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e5999d86c76e..32ca33539052 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -854,6 +854,11 @@ static int map_create(union bpf_attr *attr) err = PTR_ERR(btf); goto free_map; } + if (btf_is_kernel(btf)) { + btf_put(btf); + err = -EACCES; + goto free_map; + } map->btf = btf; if (attr->btf_value_type_id) { diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 35c5887d82ff..986dabc3d11f 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -57,19 +57,10 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym) PAGE_SIZE, true, ksym->name); } -static void bpf_trampoline_ksym_add(struct bpf_trampoline *tr) -{ - struct bpf_ksym *ksym = &tr->ksym; - - snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", tr->key); - bpf_image_ksym_add(tr->image, ksym); -} - static struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { struct bpf_trampoline *tr; struct hlist_head *head; - void *image; int i; mutex_lock(&trampoline_mutex); @@ -84,14 +75,6 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key) if (!tr) goto out; - /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */ - image = bpf_jit_alloc_exec_page(); - if (!image) { - kfree(tr); - tr = NULL; - goto out; - } - tr->key = key; INIT_HLIST_NODE(&tr->hlist); hlist_add_head(&tr->hlist, head); @@ -99,9 +82,6 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key) mutex_init(&tr->mutex); for (i = 0; i < BPF_TRAMP_MAX; i++) INIT_HLIST_HEAD(&tr->progs_hlist[i]); - tr->image = image; - INIT_LIST_HEAD_RCU(&tr->ksym.lnode); - bpf_trampoline_ksym_add(tr); out: mutex_unlock(&trampoline_mutex); return tr; @@ -185,10 +165,142 @@ bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total) return tprogs; } +static void __bpf_tramp_image_put_deferred(struct work_struct *work) +{ + struct bpf_tramp_image *im; + + im = container_of(work, struct bpf_tramp_image, work); + bpf_image_ksym_del(&im->ksym); + bpf_jit_free_exec(im->image); + bpf_jit_uncharge_modmem(1); + percpu_ref_exit(&im->pcref); + kfree_rcu(im, rcu); +} + +/* callback, fexit step 3 or fentry step 2 */ +static void __bpf_tramp_image_put_rcu(struct rcu_head *rcu) +{ + struct bpf_tramp_image *im; + + im = container_of(rcu, struct bpf_tramp_image, rcu); + INIT_WORK(&im->work, __bpf_tramp_image_put_deferred); + schedule_work(&im->work); +} + +/* callback, fexit step 2. Called after percpu_ref_kill confirms. */ +static void __bpf_tramp_image_release(struct percpu_ref *pcref) +{ + struct bpf_tramp_image *im; + + im = container_of(pcref, struct bpf_tramp_image, pcref); + call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu); +} + +/* callback, fexit or fentry step 1 */ +static void __bpf_tramp_image_put_rcu_tasks(struct rcu_head *rcu) +{ + struct bpf_tramp_image *im; + + im = container_of(rcu, struct bpf_tramp_image, rcu); + if (im->ip_after_call) + /* the case of fmod_ret/fexit trampoline and CONFIG_PREEMPTION=y */ + percpu_ref_kill(&im->pcref); + else + /* the case of fentry trampoline */ + call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu); +} + +static void bpf_tramp_image_put(struct bpf_tramp_image *im) +{ + /* The trampoline image that calls original function is using: + * rcu_read_lock_trace to protect sleepable bpf progs + * rcu_read_lock to protect normal bpf progs + * percpu_ref to protect trampoline itself + * rcu tasks to protect trampoline asm not covered by percpu_ref + * (which are few asm insns before __bpf_tramp_enter and + * after __bpf_tramp_exit) + * + * The trampoline is unreachable before bpf_tramp_image_put(). + * + * First, patch the trampoline to avoid calling into fexit progs. + * The progs will be freed even if the original function is still + * executing or sleeping. + * In case of CONFIG_PREEMPT=y use call_rcu_tasks() to wait on + * first few asm instructions to execute and call into + * __bpf_tramp_enter->percpu_ref_get. + * Then use percpu_ref_kill to wait for the trampoline and the original + * function to finish. + * Then use call_rcu_tasks() to make sure few asm insns in + * the trampoline epilogue are done as well. + * + * In !PREEMPT case the task that got interrupted in the first asm + * insns won't go through an RCU quiescent state which the + * percpu_ref_kill will be waiting for. Hence the first + * call_rcu_tasks() is not necessary. + */ + if (im->ip_after_call) { + int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_JUMP, + NULL, im->ip_epilogue); + WARN_ON(err); + if (IS_ENABLED(CONFIG_PREEMPTION)) + call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks); + else + percpu_ref_kill(&im->pcref); + return; + } + + /* The trampoline without fexit and fmod_ret progs doesn't call original + * function and doesn't use percpu_ref. + * Use call_rcu_tasks_trace() to wait for sleepable progs to finish. + * Then use call_rcu_tasks() to wait for the rest of trampoline asm + * and normal progs. + */ + call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks); +} + +static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx) +{ + struct bpf_tramp_image *im; + struct bpf_ksym *ksym; + void *image; + int err = -ENOMEM; + + im = kzalloc(sizeof(*im), GFP_KERNEL); + if (!im) + goto out; + + err = bpf_jit_charge_modmem(1); + if (err) + goto out_free_im; + + err = -ENOMEM; + im->image = image = bpf_jit_alloc_exec_page(); + if (!image) + goto out_uncharge; + + err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL); + if (err) + goto out_free_image; + + ksym = &im->ksym; + INIT_LIST_HEAD_RCU(&ksym->lnode); + snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu_%u", key, idx); + bpf_image_ksym_add(image, ksym); + return im; + +out_free_image: + bpf_jit_free_exec(im->image); +out_uncharge: + bpf_jit_uncharge_modmem(1); +out_free_im: + kfree(im); +out: + return ERR_PTR(err); +} + static int bpf_trampoline_update(struct bpf_trampoline *tr) { - void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2; - void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2; + struct bpf_tramp_image *im; struct bpf_tramp_progs *tprogs; u32 flags = BPF_TRAMP_F_RESTORE_REGS; int err, total; @@ -198,41 +310,42 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) return PTR_ERR(tprogs); if (total == 0) { - err = unregister_fentry(tr, old_image); + err = unregister_fentry(tr, tr->cur_image->image); + bpf_tramp_image_put(tr->cur_image); + tr->cur_image = NULL; tr->selector = 0; goto out; } + im = bpf_tramp_image_alloc(tr->key, tr->selector); + if (IS_ERR(im)) { + err = PTR_ERR(im); + goto out; + } + if (tprogs[BPF_TRAMP_FEXIT].nr_progs || tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs) flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; - /* Though the second half of trampoline page is unused a task could be - * preempted in the middle of the first half of trampoline and two - * updates to trampoline would change the code from underneath the - * preempted task. Hence wait for tasks to voluntarily schedule or go - * to userspace. - * The same trampoline can hold both sleepable and non-sleepable progs. - * synchronize_rcu_tasks_trace() is needed to make sure all sleepable - * programs finish executing. - * Wait for these two grace periods together. - */ - synchronize_rcu_mult(call_rcu_tasks, call_rcu_tasks_trace); - - err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2, + err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE, &tr->func.model, flags, tprogs, tr->func.addr); if (err < 0) goto out; - if (tr->selector) + WARN_ON(tr->cur_image && tr->selector == 0); + WARN_ON(!tr->cur_image && tr->selector); + if (tr->cur_image) /* progs already running at this address */ - err = modify_fentry(tr, old_image, new_image); + err = modify_fentry(tr, tr->cur_image->image, im->image); else /* first time registering */ - err = register_fentry(tr, new_image); + err = register_fentry(tr, im->image); if (err) goto out; + if (tr->cur_image) + bpf_tramp_image_put(tr->cur_image); + tr->cur_image = im; tr->selector++; out: kfree(tprogs); @@ -364,17 +477,12 @@ void bpf_trampoline_put(struct bpf_trampoline *tr) goto out; if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) goto out; - bpf_image_ksym_del(&tr->ksym); - /* This code will be executed when all bpf progs (both sleepable and - * non-sleepable) went through - * bpf_prog_put()->call_rcu[_tasks_trace]()->bpf_prog_free_deferred(). - * Hence no need for another synchronize_rcu_tasks_trace() here, - * but synchronize_rcu_tasks() is still needed, since trampoline - * may not have had any sleepable programs and we need to wait - * for tasks to get out of trampoline code before freeing it. + /* This code will be executed even when the last bpf_tramp_image + * is alive. All progs are detached from the trampoline and the + * trampoline image is patched with jmp into epilogue to skip + * fexit progs. The fentry-only trampoline will be freed via + * multiple rcu callbacks. */ - synchronize_rcu_tasks(); - bpf_jit_free_exec(tr->image); hlist_del(&tr->hlist); kfree(tr); out: @@ -433,8 +541,18 @@ void notrace __bpf_prog_exit_sleepable(void) rcu_read_unlock_trace(); } +void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr) +{ + percpu_ref_get(&tr->pcref); +} + +void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr) +{ + percpu_ref_put(&tr->pcref); +} + int __weak -arch_prepare_bpf_trampoline(void *image, void *image_end, +arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_progs *tprogs, void *orig_call) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 37581919e050..5b233e911c2c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4834,8 +4834,9 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, subprog); clear_caller_saved_regs(env, caller->regs); - /* All global functions return SCALAR_VALUE */ + /* All global functions return a 64-bit SCALAR_VALUE */ mark_reg_unknown(env, caller->regs, BPF_REG_0); + caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; /* continue with next insn after call */ return 0; @@ -5388,10 +5389,14 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, { bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || (opcode == BPF_SUB && !off_is_neg); - u32 off; + u32 off, max; switch (ptr_reg->type) { case PTR_TO_STACK: + /* Offset 0 is out-of-bounds, but acceptable start for the + * left direction, see BPF_REG_FP. + */ + max = MAX_BPF_STACK + mask_to_left; /* Indirect variable offset stack access is prohibited in * unprivileged mode so it's not handled here. */ @@ -5399,16 +5404,17 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, if (mask_to_left) *ptr_limit = MAX_BPF_STACK + off; else - *ptr_limit = -off; - return 0; + *ptr_limit = -off - 1; + return *ptr_limit >= max ? -ERANGE : 0; case PTR_TO_MAP_VALUE: + max = ptr_reg->map_ptr->value_size; if (mask_to_left) { *ptr_limit = ptr_reg->umax_value + ptr_reg->off; } else { off = ptr_reg->smin_value + ptr_reg->off; - *ptr_limit = ptr_reg->map_ptr->value_size - off; + *ptr_limit = ptr_reg->map_ptr->value_size - off - 1; } - return 0; + return *ptr_limit >= max ? -ERANGE : 0; default: return -EINVAL; } @@ -5461,6 +5467,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, u32 alu_state, alu_limit; struct bpf_reg_state tmp; bool ret; + int err; if (can_skip_alu_sanitation(env, insn)) return 0; @@ -5476,10 +5483,13 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, alu_state |= ptr_is_dst_reg ? BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; - if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) - return 0; - if (update_alu_sanitation_state(aux, alu_state, alu_limit)) - return -EACCES; + err = retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg); + if (err < 0) + return err; + + err = update_alu_sanitation_state(aux, alu_state, alu_limit); + if (err < 0) + return err; do_sim: /* Simulate and find potential out-of-bounds access under * speculative execution from truncation as a result of @@ -5595,7 +5605,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, case BPF_ADD: ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); if (ret < 0) { - verbose(env, "R%d tried to add from different maps or paths\n", dst); + verbose(env, "R%d tried to add from different maps, paths, or prohibited types\n", dst); return ret; } /* We can take a fixed offset as long as it doesn't overflow @@ -5650,7 +5660,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, case BPF_SUB: ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); if (ret < 0) { - verbose(env, "R%d tried to sub from different maps or paths\n", dst); + verbose(env, "R%d tried to sub from different maps, paths, or prohibited types\n", dst); return ret; } if (dst_reg == off_reg) { @@ -8570,6 +8580,10 @@ static int check_btf_info(struct bpf_verifier_env *env, btf = btf_get_by_fd(attr->prog_btf_fd); if (IS_ERR(btf)) return PTR_ERR(btf); + if (btf_is_kernel(btf)) { + btf_put(btf); + return -EACCES; + } env->prog->aux->btf = btf; err = check_btf_func(env, attr, uattr); @@ -11006,7 +11020,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) bool isdiv = BPF_OP(insn->code) == BPF_DIV; struct bpf_insn *patchlet; struct bpf_insn chk_and_div[] = { - /* Rx div 0 -> 0 */ + /* [R,W]x div 0 -> 0 */ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | BPF_JNE | BPF_K, insn->src_reg, 0, 2, 0), @@ -11015,16 +11029,18 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) *insn, }; struct bpf_insn chk_and_mod[] = { - /* Rx mod 0 -> Rx */ + /* [R,W]x mod 0 -> [R,W]x */ BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | BPF_JEQ | BPF_K, insn->src_reg, - 0, 1, 0), + 0, 1 + (is64 ? 0 : 1), 0), *insn, + BPF_JMP_IMM(BPF_JA, 0, 0, 1), + BPF_MOV32_REG(insn->dst_reg, insn->dst_reg), }; patchlet = isdiv ? chk_and_div : chk_and_mod; cnt = isdiv ? ARRAY_SIZE(chk_and_div) : - ARRAY_SIZE(chk_and_mod); + ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0); new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt); if (!new_prog) @@ -11076,7 +11092,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) off_reg = issrc ? insn->src_reg : insn->dst_reg; if (isneg) *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); - *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1); + *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit); *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index af6e8b4fb359..c0bb31e683e9 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -456,6 +456,17 @@ int dbg_remove_all_break(void) return 0; } +void kgdb_free_init_mem(void) +{ + int i; + + /* Clear init memory breakpoints. */ + for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) { + if (init_section_contains((void *)kgdb_break[i].bpt_addr, 0)) + kgdb_break[i].state = BP_UNDEFINED; + } +} + #ifdef CONFIG_KGDB_KDB void kdb_dump_stack_on_cpu(int cpu) { diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h index a4281fb99299..81874213b0fe 100644 --- a/kernel/debug/kdb/kdb_private.h +++ b/kernel/debug/kdb/kdb_private.h @@ -230,7 +230,7 @@ extern struct task_struct *kdb_curr_task(int); #define kdb_task_has_cpu(p) (task_curr(p)) -#define GFP_KDB (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL) +#define GFP_KDB (in_dbg_master() ? GFP_ATOMIC : GFP_KERNEL) extern void *debug_kmalloc(size_t size, gfp_t flags); extern void debug_kfree(void *); diff --git a/kernel/entry/common.c b/kernel/entry/common.c index f9d491b17b78..1ef9b15ceec9 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -184,6 +184,10 @@ static unsigned long exit_to_user_mode_loop(struct pt_regs *regs, * enabled above. */ local_irq_disable_exit_to_user(); + + /* Check if any of the above work has queued a deferred wakeup */ + rcu_nocb_flush_deferred_wakeup(); + ti_work = READ_ONCE(current_thread_info()->flags); } @@ -197,6 +201,9 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs) lockdep_assert_irqs_disabled(); + /* Flush pending rcuog wakeup before the last need_resched() check */ + rcu_nocb_flush_deferred_wakeup(); + if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) ti_work = exit_to_user_mode_loop(regs, ti_work); diff --git a/kernel/events/core.c b/kernel/events/core.c index 55d18791a72d..8425dbc1d239 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -385,6 +385,7 @@ static DEFINE_MUTEX(perf_sched_mutex); static atomic_t perf_sched_count; static DEFINE_PER_CPU(atomic_t, perf_cgroup_events); +static DEFINE_PER_CPU(int, perf_sched_cb_usages); static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events); static atomic_t nr_mmap_events __read_mostly; @@ -3474,11 +3475,16 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, } } +static DEFINE_PER_CPU(struct list_head, sched_cb_list); + void perf_sched_cb_dec(struct pmu *pmu) { struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - --cpuctx->sched_cb_usage; + this_cpu_dec(perf_sched_cb_usages); + + if (!--cpuctx->sched_cb_usage) + list_del(&cpuctx->sched_cb_entry); } @@ -3486,7 +3492,10 @@ void perf_sched_cb_inc(struct pmu *pmu) { struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - cpuctx->sched_cb_usage++; + if (!cpuctx->sched_cb_usage++) + list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list)); + + this_cpu_inc(perf_sched_cb_usages); } /* @@ -3515,6 +3524,24 @@ static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } +static void perf_pmu_sched_task(struct task_struct *prev, + struct task_struct *next, + bool sched_in) +{ + struct perf_cpu_context *cpuctx; + + if (prev == next) + return; + + list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) { + /* will be handled in perf_event_context_sched_in/out */ + if (cpuctx->task_ctx) + continue; + + __perf_pmu_sched_task(cpuctx, sched_in); + } +} + static void perf_event_switch(struct task_struct *task, struct task_struct *next_prev, bool sched_in); @@ -3537,6 +3564,9 @@ void __perf_event_task_sched_out(struct task_struct *task, { int ctxn; + if (__this_cpu_read(perf_sched_cb_usages)) + perf_pmu_sched_task(task, next, false); + if (atomic_read(&nr_switch_events)) perf_event_switch(task, next, false); @@ -3845,6 +3875,9 @@ void __perf_event_task_sched_in(struct task_struct *prev, if (atomic_read(&nr_switch_events)) perf_event_switch(task, prev, true); + + if (__this_cpu_read(perf_sched_cb_usages)) + perf_pmu_sched_task(prev, task, true); } static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count) @@ -4669,7 +4702,7 @@ static void unaccount_event(struct perf_event *event) if (event->parent) return; - if (event->attach_state & PERF_ATTACH_TASK) + if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB)) dec = true; if (event->attr.mmap || event->attr.mmap_data) atomic_dec(&nr_mmap_events); @@ -11168,7 +11201,7 @@ static void account_event(struct perf_event *event) if (event->parent) return; - if (event->attach_state & PERF_ATTACH_TASK) + if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB)) inc = true; if (event->attr.mmap || event->attr.mmap_data) atomic_inc(&nr_mmap_events); @@ -12960,6 +12993,7 @@ static void __init perf_event_init_all_cpus(void) #ifdef CONFIG_CGROUP_PERF INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu)); #endif + INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu)); } } diff --git a/kernel/fork.c b/kernel/fork.c index d66cd1014211..09658cb5317e 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -97,6 +97,10 @@ #include #include +#ifdef CONFIG_USER_NS +#include +#endif + #include #include #include @@ -994,6 +998,13 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p) #endif } +static void mm_init_pasid(struct mm_struct *mm) +{ +#ifdef CONFIG_IOMMU_SUPPORT + mm->pasid = INIT_PASID; +#endif +} + static void mm_init_uprobes_state(struct mm_struct *mm) { #ifdef CONFIG_UPROBES @@ -1024,6 +1035,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, mm_init_cpumask(mm); mm_init_aio(mm); mm_init_owner(mm, p); + mm_init_pasid(mm); RCU_INIT_POINTER(mm->exe_file, NULL); mmu_notifier_subscriptions_init(mm); init_tlb_flush_pending(mm); @@ -1864,6 +1876,10 @@ static __latent_entropy struct task_struct *copy_process( if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS)) return ERR_PTR(-EINVAL); + if ((clone_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + /* * Thread groups must share signals as well, and detached threads * can only be started up within the thread group. @@ -2933,6 +2949,12 @@ int ksys_unshare(unsigned long unshare_flags) if (unshare_flags & CLONE_NEWNS) unshare_flags |= CLONE_FS; + if ((unshare_flags & CLONE_NEWUSER) && !unprivileged_userns_clone) { + err = -EPERM; + if (!capable(CAP_SYS_ADMIN)) + goto bad_unshare_out; + } + err = check_unshare_flags(unshare_flags); if (err) goto bad_unshare_out; diff --git a/kernel/futex.c b/kernel/futex.c index 45a13eb8894e..ab3df9e86a1f 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2728,14 +2728,13 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, goto out; restart = ¤t->restart_block; - restart->fn = futex_wait_restart; restart->futex.uaddr = uaddr; restart->futex.val = val; restart->futex.time = *abs_time; restart->futex.bitset = bitset; restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; - ret = -ERESTART_RESTARTBLOCK; + ret = set_restart_fn(restart, futex_wait_restart); out: if (to) { diff --git a/kernel/gcov/clang.c b/kernel/gcov/clang.c index c94b820a1b62..8743150db2ac 100644 --- a/kernel/gcov/clang.c +++ b/kernel/gcov/clang.c @@ -75,7 +75,9 @@ struct gcov_fn_info { u32 num_counters; u64 *counters; +#if CONFIG_CLANG_VERSION < 110000 const char *function_name; +#endif }; static struct gcov_info *current_info; @@ -105,6 +107,7 @@ void llvm_gcov_init(llvm_gcov_callback writeout, llvm_gcov_callback flush) } EXPORT_SYMBOL(llvm_gcov_init); +#if CONFIG_CLANG_VERSION < 110000 void llvm_gcda_start_file(const char *orig_filename, const char version[4], u32 checksum) { @@ -113,7 +116,17 @@ void llvm_gcda_start_file(const char *orig_filename, const char version[4], current_info->checksum = checksum; } EXPORT_SYMBOL(llvm_gcda_start_file); +#else +void llvm_gcda_start_file(const char *orig_filename, u32 version, u32 checksum) +{ + current_info->filename = orig_filename; + current_info->version = version; + current_info->checksum = checksum; +} +EXPORT_SYMBOL(llvm_gcda_start_file); +#endif +#if CONFIG_CLANG_VERSION < 110000 void llvm_gcda_emit_function(u32 ident, const char *function_name, u32 func_checksum, u8 use_extra_checksum, u32 cfg_checksum) { @@ -133,6 +146,24 @@ void llvm_gcda_emit_function(u32 ident, const char *function_name, list_add_tail(&info->head, ¤t_info->functions); } EXPORT_SYMBOL(llvm_gcda_emit_function); +#else +void llvm_gcda_emit_function(u32 ident, u32 func_checksum, + u8 use_extra_checksum, u32 cfg_checksum) +{ + struct gcov_fn_info *info = kzalloc(sizeof(*info), GFP_KERNEL); + + if (!info) + return; + + INIT_LIST_HEAD(&info->head); + info->ident = ident; + info->checksum = func_checksum; + info->use_extra_checksum = use_extra_checksum; + info->cfg_checksum = cfg_checksum; + list_add_tail(&info->head, ¤t_info->functions); +} +EXPORT_SYMBOL(llvm_gcda_emit_function); +#endif void llvm_gcda_emit_arcs(u32 num_counters, u64 *counters) { @@ -295,6 +326,7 @@ void gcov_info_add(struct gcov_info *dst, struct gcov_info *src) } } +#if CONFIG_CLANG_VERSION < 110000 static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn) { size_t cv_size; /* counter values size */ @@ -322,6 +354,28 @@ static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn) kfree(fn_dup); return NULL; } +#else +static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn) +{ + size_t cv_size; /* counter values size */ + struct gcov_fn_info *fn_dup = kmemdup(fn, sizeof(*fn), + GFP_KERNEL); + if (!fn_dup) + return NULL; + INIT_LIST_HEAD(&fn_dup->head); + + cv_size = fn->num_counters * sizeof(fn->counters[0]); + fn_dup->counters = vmalloc(cv_size); + if (!fn_dup->counters) { + kfree(fn_dup); + return NULL; + } + + memcpy(fn_dup->counters, fn->counters, cv_size); + + return fn_dup; +} +#endif /** * gcov_info_dup - duplicate profiling data set @@ -362,6 +416,7 @@ struct gcov_info *gcov_info_dup(struct gcov_info *info) * gcov_info_free - release memory for profiling data set duplicate * @info: profiling data set duplicate to free */ +#if CONFIG_CLANG_VERSION < 110000 void gcov_info_free(struct gcov_info *info) { struct gcov_fn_info *fn, *tmp; @@ -375,6 +430,20 @@ void gcov_info_free(struct gcov_info *info) kfree(info->filename); kfree(info); } +#else +void gcov_info_free(struct gcov_info *info) +{ + struct gcov_fn_info *fn, *tmp; + + list_for_each_entry_safe(fn, tmp, &info->functions, head) { + vfree(fn->counters); + list_del(&fn->head); + kfree(fn); + } + kfree(info->filename); + kfree(info); +} +#endif #define ITER_STRIDE PAGE_SIZE diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index cc1a09406c6e..6c2a75eb67bd 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -654,6 +654,27 @@ int generic_handle_irq(unsigned int irq) } EXPORT_SYMBOL_GPL(generic_handle_irq); +/** + * generic_dispatch_irq - Dispatch an interrupt from an interrupt handler + * @irq: The irq number to handle + * + * A wrapper around generic_handle_irq() which ensures that interrupts are + * disabled when the primary handler of the dispatched irq is invoked. + * This is useful for interrupt handlers with dispatching to be safe for + * the forced threaded case. + */ +int generic_dispatch_irq(unsigned int irq) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = generic_handle_irq(irq); + local_irq_restore(flags); + return ret; +} +EXPORT_SYMBOL_GPL(generic_dispatch_irq); + #ifdef CONFIG_HANDLE_DOMAIN_IRQ /** * __handle_domain_irq - Invoke the handler for a HW irq belonging to a domain diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index dec3f73e8db9..21ea370fccda 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -1142,11 +1142,15 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action) irqreturn_t ret; local_bh_disable(); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_disable(); ret = action->thread_fn(action->irq, action->dev_id); if (ret == IRQ_HANDLED) atomic_inc(&desc->threads_handled); irq_finalize_oneshot(desc, action); + if (!IS_ENABLED(CONFIG_PREEMPT_RT)) + local_irq_enable(); local_bh_enable(); return ret; } diff --git a/kernel/jump_label.c b/kernel/jump_label.c index c6a39d662935..ba39fbb1f8e7 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -407,6 +407,14 @@ static bool jump_label_can_update(struct jump_entry *entry, bool init) return false; if (!kernel_text_address(jump_entry_code(entry))) { + /* + * This skips patching built-in __exit, which + * is part of init_section_contains() but is + * not part of kernel_text_address(). + * + * Skipping built-in __exit is fine since it + * will never be executed. + */ WARN_ONCE(!jump_entry_is_init(entry), "can't patch jump_label at %pS", (void *)jump_entry_code(entry)); diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 3994a217bde7..3bf98db9c702 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -101,7 +100,7 @@ static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1]; static DEFINE_PER_CPU(long, kcsan_skip); /* For kcsan_prandom_u32_max(). */ -static DEFINE_PER_CPU(struct rnd_state, kcsan_rand_state); +static DEFINE_PER_CPU(u32, kcsan_rand_state); static __always_inline atomic_long_t *find_watchpoint(unsigned long addr, size_t size, @@ -275,20 +274,17 @@ should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx * } /* - * Returns a pseudo-random number in interval [0, ep_ro). See prandom_u32_max() - * for more details. - * - * The open-coded version here is using only safe primitives for all contexts - * where we can have KCSAN instrumentation. In particular, we cannot use - * prandom_u32() directly, as its tracepoint could cause recursion. + * Returns a pseudo-random number in interval [0, ep_ro). Simple linear + * congruential generator, using constants from "Numerical Recipes". */ static u32 kcsan_prandom_u32_max(u32 ep_ro) { - struct rnd_state *state = &get_cpu_var(kcsan_rand_state); - const u32 res = prandom_u32_state(state); + u32 state = this_cpu_read(kcsan_rand_state); + + state = 1664525 * state + 1013904223; + this_cpu_write(kcsan_rand_state, state); - put_cpu_var(kcsan_rand_state); - return (u32)(((u64) res * ep_ro) >> 32); + return state % ep_ro; } static inline void reset_kcsan_skip(void) @@ -639,10 +635,14 @@ static __always_inline void check_access(const volatile void *ptr, size_t size, void __init kcsan_init(void) { + int cpu; + BUG_ON(!in_task()); kcsan_debugfs_init(); - prandom_seed_full_state(&kcsan_rand_state); + + for_each_possible_cpu(cpu) + per_cpu(kcsan_rand_state, cpu) = (u32)get_cycles(); /* * We are in the init task, and no other tasks should be running; diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index b02086d70492..5c3447cf7ad5 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -166,6 +166,11 @@ void kimage_file_post_load_cleanup(struct kimage *image) vfree(pi->sechdrs); pi->sechdrs = NULL; +#ifdef CONFIG_IMA_KEXEC + vfree(image->ima_buffer); + image->ima_buffer = NULL; +#endif /* CONFIG_IMA_KEXEC */ + /* See if architecture has anything to cleanup post load */ arch_kimage_file_post_load_cleanup(image); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index d5a3eb74a657..779d8322e307 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -861,7 +861,6 @@ static void try_to_optimize_kprobe(struct kprobe *p) cpus_read_unlock(); } -#ifdef CONFIG_SYSCTL static void optimize_all_kprobes(void) { struct hlist_head *head; @@ -887,6 +886,7 @@ static void optimize_all_kprobes(void) mutex_unlock(&kprobe_mutex); } +#ifdef CONFIG_SYSCTL static void unoptimize_all_kprobes(void) { struct hlist_head *head; @@ -2497,18 +2497,14 @@ static int __init init_kprobes(void) } } -#if defined(CONFIG_OPTPROBES) -#if defined(__ARCH_WANT_KPROBES_INSN_SLOT) - /* Init kprobe_optinsn_slots */ - kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; -#endif - /* By default, kprobes can be optimized */ - kprobes_allow_optimization = true; -#endif - /* By default, kprobes are armed */ kprobes_all_disarmed = false; +#if defined(CONFIG_OPTPROBES) && defined(__ARCH_WANT_KPROBES_INSN_SLOT) + /* Init kprobe_optinsn_slots for allocation */ + kprobe_optinsn_slots.insn_size = MAX_OPTINSN_SIZE; +#endif + err = arch_init_kprobes(); if (!err) err = register_die_notifier(&kprobe_exceptions_nb); @@ -2523,6 +2519,21 @@ static int __init init_kprobes(void) } early_initcall(init_kprobes); +#if defined(CONFIG_OPTPROBES) +static int __init init_optprobes(void) +{ + /* + * Enable kprobe optimization - this kicks the optimizer which + * depends on synchronize_rcu_tasks() and ksoftirqd, that is + * not spawned in early initcall. So delay the optimization. + */ + optimize_all_kprobes(); + + return 0; +} +subsys_initcall(init_optprobes); +#endif + #ifdef CONFIG_DEBUG_FS static void report_probe(struct seq_file *pi, struct kprobe *p, const char *sym, int offset, char *modname, struct kprobe *pp) diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index bdaf4829098c..780012eb2f3f 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -3707,7 +3707,7 @@ static void print_usage_bug(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit prev_bit, enum lock_usage_bit new_bit) { - if (!debug_locks_off_graph_unlock() || debug_locks_silent) + if (!debug_locks_off() || debug_locks_silent) return; pr_warn("\n"); @@ -3748,6 +3748,7 @@ valid_state(struct task_struct *curr, struct held_lock *this, enum lock_usage_bit new_bit, enum lock_usage_bit bad_bit) { if (unlikely(hlock_class(this)->usage_mask & (1 << bad_bit))) { + graph_unlock(); print_usage_bug(curr, this, bad_bit, new_bit); return 0; } diff --git a/kernel/module.c b/kernel/module.c index 4bf30e4b3eaa..1e5aad812310 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2348,6 +2348,21 @@ static int verify_exported_symbols(struct module *mod) return 0; } +static bool ignore_undef_symbol(Elf_Half emachine, const char *name) +{ + /* + * On x86, PIC code and Clang non-PIC code may have call foo@PLT. GNU as + * before 2.37 produces an unreferenced _GLOBAL_OFFSET_TABLE_ on x86-64. + * i386 has a similar problem but may not deserve a fix. + * + * If we ever have to ignore many symbols, consider refactoring the code to + * only warn if referenced by a relocation. + */ + if (emachine == EM_386 || emachine == EM_X86_64) + return !strcmp(name, "_GLOBAL_OFFSET_TABLE_"); + return false; +} + /* Change all symbols so that st_value encodes the pointer directly. */ static int simplify_symbols(struct module *mod, const struct load_info *info) { @@ -2395,8 +2410,10 @@ static int simplify_symbols(struct module *mod, const struct load_info *info) break; } - /* Ok if weak. */ - if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK) + /* Ok if weak or ignored. */ + if (!ksym && + (ELF_ST_BIND(sym[i].st_info) == STB_WEAK || + ignore_undef_symbol(info->hdr->e_machine, name))) break; ret = PTR_ERR(ksym) ?: -ENOENT; @@ -2964,7 +2981,7 @@ static int module_sig_check(struct load_info *info, int flags) } if (is_module_sig_enforced()) { - pr_notice("%s: loading of %s is rejected\n", info->name, reason); + pr_notice("Loading of %s is rejected\n", reason); return -EKEYREJECTED; } @@ -2977,9 +2994,33 @@ static int module_sig_check(struct load_info *info, int flags) } #endif /* !CONFIG_MODULE_SIG */ -/* Sanity checks against invalid binaries, wrong arch, weird elf version. */ -static int elf_header_check(struct load_info *info) +static int validate_section_offset(struct load_info *info, Elf_Shdr *shdr) +{ + unsigned long secend; + + /* + * Check for both overflow and offset/size being + * too large. + */ + secend = shdr->sh_offset + shdr->sh_size; + if (secend < shdr->sh_offset || secend > info->len) + return -ENOEXEC; + + return 0; +} + +/* + * Sanity checks against invalid binaries, wrong arch, weird elf version. + * + * Also do basic validity checks against section offsets and sizes, the + * section name string table, and the indices used for it (sh_name). + */ +static int elf_validity_check(struct load_info *info) { + unsigned int i; + Elf_Shdr *shdr, *strhdr; + int err; + if (info->len < sizeof(*(info->hdr))) return -ENOEXEC; @@ -2989,11 +3030,78 @@ static int elf_header_check(struct load_info *info) || info->hdr->e_shentsize != sizeof(Elf_Shdr)) return -ENOEXEC; + /* + * e_shnum is 16 bits, and sizeof(Elf_Shdr) is + * known and small. So e_shnum * sizeof(Elf_Shdr) + * will not overflow unsigned long on any platform. + */ if (info->hdr->e_shoff >= info->len || (info->hdr->e_shnum * sizeof(Elf_Shdr) > info->len - info->hdr->e_shoff)) return -ENOEXEC; + info->sechdrs = (void *)info->hdr + info->hdr->e_shoff; + + /* + * Verify if the section name table index is valid. + */ + if (info->hdr->e_shstrndx == SHN_UNDEF + || info->hdr->e_shstrndx >= info->hdr->e_shnum) + return -ENOEXEC; + + strhdr = &info->sechdrs[info->hdr->e_shstrndx]; + err = validate_section_offset(info, strhdr); + if (err < 0) + return err; + + /* + * The section name table must be NUL-terminated, as required + * by the spec. This makes strcmp and pr_* calls that access + * strings in the section safe. + */ + info->secstrings = (void *)info->hdr + strhdr->sh_offset; + if (info->secstrings[strhdr->sh_size - 1] != '\0') + return -ENOEXEC; + + /* + * The code assumes that section 0 has a length of zero and + * an addr of zero, so check for it. + */ + if (info->sechdrs[0].sh_type != SHT_NULL + || info->sechdrs[0].sh_size != 0 + || info->sechdrs[0].sh_addr != 0) + return -ENOEXEC; + + for (i = 1; i < info->hdr->e_shnum; i++) { + shdr = &info->sechdrs[i]; + switch (shdr->sh_type) { + case SHT_NULL: + case SHT_NOBITS: + continue; + case SHT_SYMTAB: + if (shdr->sh_link == SHN_UNDEF + || shdr->sh_link >= info->hdr->e_shnum) + return -ENOEXEC; + fallthrough; + default: + err = validate_section_offset(info, shdr); + if (err < 0) { + pr_err("Invalid ELF section in module (section %u type %u)\n", + i, shdr->sh_type); + return err; + } + + if (shdr->sh_flags & SHF_ALLOC) { + if (shdr->sh_name >= strhdr->sh_size) { + pr_err("Invalid ELF section name in module (section %u type %u)\n", + i, shdr->sh_type); + return -ENOEXEC; + } + } + break; + } + } + return 0; } @@ -3095,11 +3203,6 @@ static int rewrite_section_headers(struct load_info *info, int flags) for (i = 1; i < info->hdr->e_shnum; i++) { Elf_Shdr *shdr = &info->sechdrs[i]; - if (shdr->sh_type != SHT_NOBITS - && info->len < shdr->sh_offset + shdr->sh_size) { - pr_err("Module len %lu truncated\n", info->len); - return -ENOEXEC; - } /* * Mark all sections sh_addr with their address in the @@ -3133,11 +3236,6 @@ static int setup_load_info(struct load_info *info, int flags) { unsigned int i; - /* Set up the convenience variables */ - info->sechdrs = (void *)info->hdr + info->hdr->e_shoff; - info->secstrings = (void *)info->hdr - + info->sechdrs[info->hdr->e_shstrndx].sh_offset; - /* Try to find a name early so we can log errors with a module name */ info->index.info = find_sec(info, ".modinfo"); if (info->index.info) @@ -3894,26 +3992,50 @@ static int load_module(struct load_info *info, const char __user *uargs, long err = 0; char *after_dashes; - err = elf_header_check(info); + /* + * Do the signature check (if any) first. All that + * the signature check needs is info->len, it does + * not need any of the section info. That can be + * set up later. This will minimize the chances + * of a corrupt module causing problems before + * we even get to the signature check. + * + * The check will also adjust info->len by stripping + * off the sig length at the end of the module, making + * checks against info->len more correct. + */ + err = module_sig_check(info, flags); + if (err) + goto free_copy; + + /* + * Do basic sanity checks against the ELF header and + * sections. + */ + err = elf_validity_check(info); if (err) { - pr_err("Module has invalid ELF header\n"); + pr_err("Module has invalid ELF structures\n"); goto free_copy; } + /* + * Everything checks out, so set up the section info + * in the info structure. + */ err = setup_load_info(info, flags); if (err) goto free_copy; + /* + * Now that we know we have the correct module name, check + * if it's blacklisted. + */ if (blacklisted(info->name)) { err = -EPERM; pr_err("Module %s is blacklisted\n", info->name); goto free_copy; } - err = module_sig_check(info, flags); - if (err) - goto free_copy; - err = rewrite_section_headers(info, flags); if (err) goto free_copy; diff --git a/kernel/module_signature.c b/kernel/module_signature.c index 4224a1086b7d..00132d12487c 100644 --- a/kernel/module_signature.c +++ b/kernel/module_signature.c @@ -25,7 +25,7 @@ int mod_check_sig(const struct module_signature *ms, size_t file_len, return -EBADMSG; if (ms->id_type != PKEY_ID_PKCS7) { - pr_err("%s: Module is not signed with expected PKCS#7 message\n", + pr_err("%s: not signed with expected PKCS#7 message\n", name); return -ENOPKG; } diff --git a/kernel/module_signing.c b/kernel/module_signing.c index 9d9fc678c91d..8723ae70ea1f 100644 --- a/kernel/module_signing.c +++ b/kernel/module_signing.c @@ -30,7 +30,7 @@ int mod_verify_sig(const void *mod, struct load_info *info) memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms)); - ret = mod_check_sig(&ms, modlen, info->name); + ret = mod_check_sig(&ms, modlen, "module"); if (ret) return ret; diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 1358fa4abfa8..0f4530b3a8cd 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -98,7 +98,7 @@ static int __init em_debug_init(void) return 0; } -core_initcall(em_debug_init); +fs_initcall(em_debug_init); #else /* CONFIG_DEBUG_FS */ static void em_debug_create_pd(struct device *dev) {} static void em_debug_remove_pd(struct device *dev) {} diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 5a95c688621f..575a34b88936 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -735,9 +735,9 @@ static ssize_t devkmsg_read(struct file *file, char __user *buf, logbuf_lock_irq(); } - if (user->seq < prb_first_valid_seq(prb)) { + if (r->info->seq != user->seq) { /* our last seen message is gone, return error and reset */ - user->seq = prb_first_valid_seq(prb); + user->seq = r->info->seq; ret = -EPIPE; logbuf_unlock_irq(); goto out; @@ -812,6 +812,7 @@ static loff_t devkmsg_llseek(struct file *file, loff_t offset, int whence) static __poll_t devkmsg_poll(struct file *file, poll_table *wait) { struct devkmsg_user *user = file->private_data; + struct printk_info info; __poll_t ret = 0; if (!user) @@ -820,9 +821,9 @@ static __poll_t devkmsg_poll(struct file *file, poll_table *wait) poll_wait(file, &log_wait, wait); logbuf_lock_irq(); - if (prb_read_valid(prb, user->seq, NULL)) { + if (prb_read_valid_info(prb, user->seq, &info, NULL)) { /* return error when data has vanished underneath us */ - if (user->seq < prb_first_valid_seq(prb)) + if (info.seq != user->seq) ret = EPOLLIN|EPOLLRDNORM|EPOLLERR|EPOLLPRI; else ret = EPOLLIN|EPOLLRDNORM; @@ -1559,6 +1560,7 @@ static void syslog_clear(void) int do_syslog(int type, char __user *buf, int len, int source) { + struct printk_info info; bool clear = false; static int saved_console_loglevel = LOGLEVEL_DEFAULT; int error; @@ -1629,9 +1631,14 @@ int do_syslog(int type, char __user *buf, int len, int source) /* Number of chars in the log buffer */ case SYSLOG_ACTION_SIZE_UNREAD: logbuf_lock_irq(); - if (syslog_seq < prb_first_valid_seq(prb)) { + if (!prb_read_valid_info(prb, syslog_seq, &info, NULL)) { + /* No unread messages. */ + logbuf_unlock_irq(); + return 0; + } + if (info.seq != syslog_seq) { /* messages are gone, move to first one */ - syslog_seq = prb_first_valid_seq(prb); + syslog_seq = info.seq; syslog_partial = 0; } if (source == SYSLOG_FROM_PROC) { @@ -1643,7 +1650,6 @@ int do_syslog(int type, char __user *buf, int len, int source) error = prb_next_seq(prb) - syslog_seq; } else { bool time = syslog_partial ? syslog_time : printk_time; - struct printk_info info; unsigned int line_count; u64 seq; @@ -3429,9 +3435,11 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog, goto out; logbuf_lock_irqsave(flags); - if (dumper->cur_seq < prb_first_valid_seq(prb)) { - /* messages are gone, move to first available one */ - dumper->cur_seq = prb_first_valid_seq(prb); + if (prb_read_valid_info(prb, dumper->cur_seq, &info, NULL)) { + if (info.seq != dumper->cur_seq) { + /* messages are gone, move to first available one */ + dumper->cur_seq = info.seq; + } } /* last entry */ diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index a0e6f746de6c..2e9e3ed7d63e 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -45,6 +45,8 @@ struct printk_safe_seq_buf { static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq); static DEFINE_PER_CPU(int, printk_context); +static DEFINE_RAW_SPINLOCK(safe_read_lock); + #ifdef CONFIG_PRINTK_NMI static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); #endif @@ -180,8 +182,6 @@ static void report_message_lost(struct printk_safe_seq_buf *s) */ static void __printk_safe_flush(struct irq_work *work) { - static raw_spinlock_t read_lock = - __RAW_SPIN_LOCK_INITIALIZER(read_lock); struct printk_safe_seq_buf *s = container_of(work, struct printk_safe_seq_buf, work); unsigned long flags; @@ -195,7 +195,7 @@ static void __printk_safe_flush(struct irq_work *work) * different CPUs. This is especially important when printing * a backtrace. */ - raw_spin_lock_irqsave(&read_lock, flags); + raw_spin_lock_irqsave(&safe_read_lock, flags); i = 0; more: @@ -232,7 +232,7 @@ static void __printk_safe_flush(struct irq_work *work) out: report_message_lost(s); - raw_spin_unlock_irqrestore(&read_lock, flags); + raw_spin_unlock_irqrestore(&safe_read_lock, flags); } /** @@ -278,6 +278,14 @@ void printk_safe_flush_on_panic(void) raw_spin_lock_init(&logbuf_lock); } + if (raw_spin_is_locked(&safe_read_lock)) { + if (num_online_cpus() > 1) + return; + + debug_locks_off(); + raw_spin_lock_init(&safe_read_lock); + } + printk_safe_flush(); } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 40e5e3dd253e..ce17b8477442 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -644,7 +644,6 @@ static noinstr void rcu_eqs_enter(bool user) trace_rcu_dyntick(TPS("Start"), rdp->dynticks_nesting, 0, atomic_read(&rdp->dynticks)); WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current)); rdp = this_cpu_ptr(&rcu_data); - do_nocb_deferred_wakeup(rdp); rcu_prepare_for_idle(); rcu_preempt_deferred_qs(current); @@ -678,6 +677,50 @@ void rcu_idle_enter(void) EXPORT_SYMBOL_GPL(rcu_idle_enter); #ifdef CONFIG_NO_HZ_FULL + +#if !defined(CONFIG_GENERIC_ENTRY) || !defined(CONFIG_KVM_XFER_TO_GUEST_WORK) +/* + * An empty function that will trigger a reschedule on + * IRQ tail once IRQs get re-enabled on userspace/guest resume. + */ +static void late_wakeup_func(struct irq_work *work) +{ +} + +static DEFINE_PER_CPU(struct irq_work, late_wakeup_work) = + IRQ_WORK_INIT(late_wakeup_func); + +/* + * If either: + * + * 1) the task is about to enter in guest mode and $ARCH doesn't support KVM generic work + * 2) the task is about to enter in user mode and $ARCH doesn't support generic entry. + * + * In these cases the late RCU wake ups aren't supported in the resched loops and our + * last resort is to fire a local irq_work that will trigger a reschedule once IRQs + * get re-enabled again. + */ +noinstr static void rcu_irq_work_resched(void) +{ + struct rcu_data *rdp = this_cpu_ptr(&rcu_data); + + if (IS_ENABLED(CONFIG_GENERIC_ENTRY) && !(current->flags & PF_VCPU)) + return; + + if (IS_ENABLED(CONFIG_KVM_XFER_TO_GUEST_WORK) && (current->flags & PF_VCPU)) + return; + + instrumentation_begin(); + if (do_nocb_deferred_wakeup(rdp) && need_resched()) { + irq_work_queue(this_cpu_ptr(&late_wakeup_work)); + } + instrumentation_end(); +} + +#else +static inline void rcu_irq_work_resched(void) { } +#endif + /** * rcu_user_enter - inform RCU that we are resuming userspace. * @@ -692,8 +735,16 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter); noinstr void rcu_user_enter(void) { lockdep_assert_irqs_disabled(); + + /* + * Other than generic entry implementation, we may be past the last + * rescheduling opportunity in the entry code. Trigger a self IPI + * that will fire and reschedule once we resume in user/guest mode. + */ + rcu_irq_work_resched(); rcu_eqs_enter(true); } + #endif /* CONFIG_NO_HZ_FULL */ /** diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 7708ed161f4a..9226f4021a36 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -433,7 +433,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, unsigned long flags); static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp); -static void do_nocb_deferred_wakeup(struct rcu_data *rdp); +static bool do_nocb_deferred_wakeup(struct rcu_data *rdp); static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); static void rcu_spawn_cpu_nocb_kthread(int cpu); static void __init rcu_spawn_nocb_kthreads(void); diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 7e291ce0a1d6..cdc1b7651c03 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1631,8 +1631,8 @@ bool rcu_is_nocb_cpu(int cpu) * Kick the GP kthread for this NOCB group. Caller holds ->nocb_lock * and this function releases it. */ -static void wake_nocb_gp(struct rcu_data *rdp, bool force, - unsigned long flags) +static bool wake_nocb_gp(struct rcu_data *rdp, bool force, + unsigned long flags) __releases(rdp->nocb_lock) { bool needwake = false; @@ -1643,7 +1643,7 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force, trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("AlreadyAwake")); rcu_nocb_unlock_irqrestore(rdp, flags); - return; + return false; } del_timer(&rdp->nocb_timer); rcu_nocb_unlock_irqrestore(rdp, flags); @@ -1656,6 +1656,8 @@ static void wake_nocb_gp(struct rcu_data *rdp, bool force, raw_spin_unlock_irqrestore(&rdp_gp->nocb_gp_lock, flags); if (needwake) wake_up_process(rdp_gp->nocb_gp_kthread); + + return needwake; } /* @@ -2152,20 +2154,23 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) } /* Do a deferred wakeup of rcu_nocb_kthread(). */ -static void do_nocb_deferred_wakeup_common(struct rcu_data *rdp) +static bool do_nocb_deferred_wakeup_common(struct rcu_data *rdp) { unsigned long flags; int ndw; + int ret; rcu_nocb_lock_irqsave(rdp, flags); if (!rcu_nocb_need_deferred_wakeup(rdp)) { rcu_nocb_unlock_irqrestore(rdp, flags); - return; + return false; } ndw = READ_ONCE(rdp->nocb_defer_wakeup); WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOCB_WAKE_NOT); - wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags); + ret = wake_nocb_gp(rdp, ndw == RCU_NOCB_WAKE_FORCE, flags); trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("DeferredWake")); + + return ret; } /* Do a deferred wakeup of rcu_nocb_kthread() from a timer handler. */ @@ -2181,12 +2186,19 @@ static void do_nocb_deferred_wakeup_timer(struct timer_list *t) * This means we do an inexact common-case check. Note that if * we miss, ->nocb_timer will eventually clean things up. */ -static void do_nocb_deferred_wakeup(struct rcu_data *rdp) +static bool do_nocb_deferred_wakeup(struct rcu_data *rdp) { if (rcu_nocb_need_deferred_wakeup(rdp)) - do_nocb_deferred_wakeup_common(rdp); + return do_nocb_deferred_wakeup_common(rdp); + return false; } +void rcu_nocb_flush_deferred_wakeup(void) +{ + do_nocb_deferred_wakeup(this_cpu_ptr(&rcu_data)); +} +EXPORT_SYMBOL_GPL(rcu_nocb_flush_deferred_wakeup); + void __init rcu_init_nohz(void) { int cpu; @@ -2518,8 +2530,9 @@ static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp) return false; } -static void do_nocb_deferred_wakeup(struct rcu_data *rdp) +static bool do_nocb_deferred_wakeup(struct rcu_data *rdp) { + return false; } static void rcu_spawn_cpu_nocb_kthread(int cpu) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ff74fca39ed2..f0056507a373 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -355,8 +355,9 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer) static void __hrtick_restart(struct rq *rq) { struct hrtimer *timer = &rq->hrtick_timer; + ktime_t time = rq->hrtick_time; - hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED_HARD); + hrtimer_start(timer, time, HRTIMER_MODE_ABS_PINNED_HARD); } /* @@ -380,7 +381,6 @@ static void __hrtick_start(void *arg) void hrtick_start(struct rq *rq, u64 delay) { struct hrtimer *timer = &rq->hrtick_timer; - ktime_t time; s64 delta; /* @@ -388,9 +388,7 @@ void hrtick_start(struct rq *rq, u64 delay) * doesn't make sense and can cause timer DoS. */ delta = max_t(s64, delay, 10000LL); - time = ktime_add_ns(timer->base->get_time(), delta); - - hrtimer_set_expires(timer, time); + rq->hrtick_time = ktime_add_ns(timer->base->get_time(), delta); if (rq == this_rq()) __hrtick_restart(rq); @@ -1864,8 +1862,13 @@ struct migration_arg { struct set_affinity_pending *pending; }; +/* + * @refs: number of wait_for_completion() + * @stop_pending: is @stop_work in use + */ struct set_affinity_pending { refcount_t refs; + unsigned int stop_pending; struct completion done; struct cpu_stop_work stop_work; struct migration_arg arg; @@ -1900,8 +1903,8 @@ static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf, */ static int migration_cpu_stop(void *data) { - struct set_affinity_pending *pending; struct migration_arg *arg = data; + struct set_affinity_pending *pending = arg->pending; struct task_struct *p = arg->task; int dest_cpu = arg->dest_cpu; struct rq *rq = this_rq(); @@ -1923,7 +1926,6 @@ static int migration_cpu_stop(void *data) raw_spin_lock(&p->pi_lock); rq_lock(rq, &rf); - pending = p->migration_pending; /* * If task_rq(p) != rq, it cannot be migrated here, because we're * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because @@ -1934,21 +1936,14 @@ static int migration_cpu_stop(void *data) goto out; if (pending) { - p->migration_pending = NULL; + if (p->migration_pending == pending) + p->migration_pending = NULL; complete = true; } - /* migrate_enable() -- we must not race against SCA */ if (dest_cpu < 0) { - /* - * When this was migrate_enable() but we no longer - * have a @pending, a concurrent SCA 'fixed' things - * and we should be valid again. Nothing to do. - */ - if (!pending) { - WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask)); + if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) goto out; - } dest_cpu = cpumask_any_distribute(&p->cpus_mask); } @@ -1958,7 +1953,14 @@ static int migration_cpu_stop(void *data) else p->wake_cpu = dest_cpu; - } else if (dest_cpu < 0 || pending) { + /* + * XXX __migrate_task() can fail, at which point we might end + * up running on a dodgy CPU, AFAICT this can only happen + * during CPU hotplug, at which point we'll get pushed out + * anyway, so it's probably not a big deal. + */ + + } else if (pending) { /* * This happens when we get migrated between migrate_enable()'s * preempt_enable() and scheduling the stopper task. At that @@ -1973,43 +1975,32 @@ static int migration_cpu_stop(void *data) * ->pi_lock, so the allowed mask is stable - if it got * somewhere allowed, we're done. */ - if (pending && cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) { - p->migration_pending = NULL; + if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) { + if (p->migration_pending == pending) + p->migration_pending = NULL; complete = true; goto out; } - /* - * When this was migrate_enable() but we no longer have an - * @pending, a concurrent SCA 'fixed' things and we should be - * valid again. Nothing to do. - */ - if (!pending) { - WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask)); - goto out; - } - /* * When migrate_enable() hits a rq mis-match we can't reliably * determine is_migration_disabled() and so have to chase after * it. */ + WARN_ON_ONCE(!pending->stop_pending); task_rq_unlock(rq, p, &rf); stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop, &pending->arg, &pending->stop_work); return 0; } out: + if (pending) + pending->stop_pending = false; task_rq_unlock(rq, p, &rf); if (complete) complete_all(&pending->done); - /* For pending->{arg,stop_work} */ - pending = arg->pending; - if (pending && refcount_dec_and_test(&pending->refs)) - wake_up_var(&pending->refs); - return 0; } @@ -2196,11 +2187,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag int dest_cpu, unsigned int flags) { struct set_affinity_pending my_pending = { }, *pending = NULL; - struct migration_arg arg = { - .task = p, - .dest_cpu = dest_cpu, - }; - bool complete = false; + bool stop_pending, complete = false; /* Can the task run on the task's current CPU? If so, we're done */ if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) { @@ -2212,12 +2199,16 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag push_task = get_task_struct(p); } + /* + * If there are pending waiters, but no pending stop_work, + * then complete now. + */ pending = p->migration_pending; - if (pending) { - refcount_inc(&pending->refs); + if (pending && !pending->stop_pending) { p->migration_pending = NULL; complete = true; } + task_rq_unlock(rq, p, rf); if (push_task) { @@ -2226,7 +2217,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag } if (complete) - goto do_complete; + complete_all(&pending->done); return 0; } @@ -2237,6 +2228,12 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag /* Install the request */ refcount_set(&my_pending.refs, 1); init_completion(&my_pending.done); + my_pending.arg = (struct migration_arg) { + .task = p, + .dest_cpu = -1, /* any */ + .pending = &my_pending, + }; + p->migration_pending = &my_pending; } else { pending = p->migration_pending; @@ -2261,45 +2258,41 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag return -EINVAL; } - if (flags & SCA_MIGRATE_ENABLE) { - - refcount_inc(&pending->refs); /* pending->{arg,stop_work} */ - p->migration_flags &= ~MDF_PUSH; - task_rq_unlock(rq, p, rf); - - pending->arg = (struct migration_arg) { - .task = p, - .dest_cpu = -1, - .pending = pending, - }; - - stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, - &pending->arg, &pending->stop_work); - - return 0; - } - if (task_running(rq, p) || p->state == TASK_WAKING) { /* - * Lessen races (and headaches) by delegating - * is_migration_disabled(p) checks to the stopper, which will - * run on the same CPU as said p. + * MIGRATE_ENABLE gets here because 'p == current', but for + * anything else we cannot do is_migration_disabled(), punt + * and have the stopper function handle it all race-free. */ + stop_pending = pending->stop_pending; + if (!stop_pending) + pending->stop_pending = true; + + if (flags & SCA_MIGRATE_ENABLE) + p->migration_flags &= ~MDF_PUSH; + task_rq_unlock(rq, p, rf); - stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg); + if (!stop_pending) { + stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop, + &pending->arg, &pending->stop_work); + } + + if (flags & SCA_MIGRATE_ENABLE) + return 0; } else { if (!is_migration_disabled(p)) { if (task_on_rq_queued(p)) rq = move_queued_task(rq, rf, p, dest_cpu); - p->migration_pending = NULL; - complete = true; + if (!pending->stop_pending) { + p->migration_pending = NULL; + complete = true; + } } task_rq_unlock(rq, p, rf); -do_complete: if (complete) complete_all(&pending->done); } @@ -2307,7 +2300,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag wait_for_completion(&pending->done); if (refcount_dec_and_test(&pending->refs)) - wake_up_var(&pending->refs); + wake_up_var(&pending->refs); /* No UaF, just an address */ /* * Block the original owner of &pending until all subsequent callers @@ -2315,6 +2308,9 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag */ wait_var_event(&my_pending.refs, !refcount_read(&my_pending.refs)); + /* ARGH */ + WARN_ON_ONCE(my_pending.stop_pending); + return 0; } @@ -3478,7 +3474,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) /** * try_invoke_on_locked_down_task - Invoke a function on task in fixed state - * @p: Process for which the function is to be invoked. + * @p: Process for which the function is to be invoked, can be @current. * @func: Function to invoke. * @arg: Argument to function. * @@ -3496,12 +3492,11 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) */ bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct task_struct *t, void *arg), void *arg) { - bool ret = false; struct rq_flags rf; + bool ret = false; struct rq *rq; - lockdep_assert_irqs_enabled(); - raw_spin_lock_irq(&p->pi_lock); + raw_spin_lock_irqsave(&p->pi_lock, rf.flags); if (p->on_rq) { rq = __task_rq_lock(p, &rf); if (task_rq(p) == rq) @@ -3518,7 +3513,7 @@ bool try_invoke_on_locked_down_task(struct task_struct *p, bool (*func)(struct t ret = func(p, arg); } } - raw_spin_unlock_irq(&p->pi_lock); + raw_spin_unlock_irqrestore(&p->pi_lock, rf.flags); return ret; } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 04a3ce20da67..bbc78794224a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3943,6 +3943,22 @@ static inline void util_est_enqueue(struct cfs_rq *cfs_rq, trace_sched_util_est_cfs_tp(cfs_rq); } +static inline void util_est_dequeue(struct cfs_rq *cfs_rq, + struct task_struct *p) +{ + unsigned int enqueued; + + if (!sched_feat(UTIL_EST)) + return; + + /* Update root cfs_rq's estimated utilization */ + enqueued = cfs_rq->avg.util_est.enqueued; + enqueued -= min_t(unsigned int, enqueued, _task_util_est(p)); + WRITE_ONCE(cfs_rq->avg.util_est.enqueued, enqueued); + + trace_sched_util_est_cfs_tp(cfs_rq); +} + /* * Check if a (signed) value is within a specified (unsigned) margin, * based on the observation that: @@ -3956,23 +3972,16 @@ static inline bool within_margin(int value, int margin) return ((unsigned int)(value + margin - 1) < (2 * margin - 1)); } -static void -util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) +static inline void util_est_update(struct cfs_rq *cfs_rq, + struct task_struct *p, + bool task_sleep) { long last_ewma_diff; struct util_est ue; - int cpu; if (!sched_feat(UTIL_EST)) return; - /* Update root cfs_rq's estimated utilization */ - ue.enqueued = cfs_rq->avg.util_est.enqueued; - ue.enqueued -= min_t(unsigned int, ue.enqueued, _task_util_est(p)); - WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued); - - trace_sched_util_est_cfs_tp(cfs_rq); - /* * Skip update of task's estimated utilization when the task has not * yet completed an activation, e.g. being migrated. @@ -4012,8 +4021,7 @@ util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) * To avoid overestimation of actual task utilization, skip updates if * we cannot grant there is idle time in this CPU. */ - cpu = cpu_of(rq_of(cfs_rq)); - if (task_util(p) > capacity_orig_of(cpu)) + if (task_util(p) > capacity_orig_of(cpu_of(rq_of(cfs_rq)))) return; /* @@ -4052,7 +4060,7 @@ static inline void update_misfit_status(struct task_struct *p, struct rq *rq) if (!static_branch_unlikely(&sched_asym_cpucapacity)) return; - if (!p) { + if (!p || p->nr_cpus_allowed == 1) { rq->misfit_task_load = 0; return; } @@ -4096,8 +4104,11 @@ static inline void util_est_enqueue(struct cfs_rq *cfs_rq, struct task_struct *p) {} static inline void -util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, - bool task_sleep) {} +util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p) {} + +static inline void +util_est_update(struct cfs_rq *cfs_rq, struct task_struct *p, + bool task_sleep) {} static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {} #endif /* CONFIG_SMP */ @@ -5609,6 +5620,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) int idle_h_nr_running = task_has_idle_policy(p); bool was_sched_idle = sched_idle_rq(rq); + util_est_dequeue(&rq->cfs, p); + for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, flags); @@ -5659,7 +5672,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) rq->next_balance = jiffies; dequeue_throttle: - util_est_dequeue(&rq->cfs, p, task_sleep); + util_est_update(&rq->cfs, p, task_sleep); hrtick_update(rq); } diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 305727ea0677..7199e6f23789 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -285,6 +285,7 @@ static void do_idle(void) } arch_cpu_idle_enter(); + rcu_nocb_flush_deferred_wakeup(); /* * In poll mode we reenable interrupts and spin. Also if we diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 08ae45ad9261..f311bf85d211 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -471,9 +471,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm) } rcu_read_unlock(); - preempt_disable(); - smp_call_function_many(tmpmask, ipi_sync_rq_state, mm, 1); - preempt_enable(); + on_each_cpu_mask(tmpmask, ipi_sync_rq_state, mm, true); free_cpumask_var(tmpmask); cpus_read_unlock(); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index bb09988451a0..282a6bbaacd7 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1031,6 +1031,7 @@ struct rq { call_single_data_t hrtick_csd; #endif struct hrtimer hrtick_timer; + ktime_t hrtick_time; #endif #ifdef CONFIG_SCHEDSTATS diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 952dc1c90229..63b40d12896b 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1284,6 +1284,8 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd, const bool recheck_after_trace) { BUG(); + + return -1; } #endif diff --git a/kernel/smp.c b/kernel/smp.c index 1b6070bf97bb..aeb0adfa0606 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -449,6 +450,9 @@ void flush_smp_call_function_from_idle(void) local_irq_save(flags); flush_smp_call_function_queue(true); + if (local_softirq_pending()) + do_softirq(); + local_irq_restore(flags); } diff --git a/kernel/static_call.c b/kernel/static_call.c index 84565c2a41b8..49efbdc5b480 100644 --- a/kernel/static_call.c +++ b/kernel/static_call.c @@ -12,6 +12,8 @@ extern struct static_call_site __start_static_call_sites[], __stop_static_call_sites[]; +extern struct static_call_tramp_key __start_static_call_tramp_key[], + __stop_static_call_tramp_key[]; static bool static_call_initialized; @@ -33,27 +35,30 @@ static inline void *static_call_addr(struct static_call_site *site) return (void *)((long)site->addr + (long)&site->addr); } +static inline unsigned long __static_call_key(const struct static_call_site *site) +{ + return (long)site->key + (long)&site->key; +} static inline struct static_call_key *static_call_key(const struct static_call_site *site) { - return (struct static_call_key *) - (((long)site->key + (long)&site->key) & ~STATIC_CALL_SITE_FLAGS); + return (void *)(__static_call_key(site) & ~STATIC_CALL_SITE_FLAGS); } /* These assume the key is word-aligned. */ static inline bool static_call_is_init(struct static_call_site *site) { - return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_INIT; + return __static_call_key(site) & STATIC_CALL_SITE_INIT; } static inline bool static_call_is_tail(struct static_call_site *site) { - return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_TAIL; + return __static_call_key(site) & STATIC_CALL_SITE_TAIL; } static inline void static_call_set_init(struct static_call_site *site) { - site->key = ((long)static_call_key(site) | STATIC_CALL_SITE_INIT) - + site->key = (__static_call_key(site) | STATIC_CALL_SITE_INIT) - (long)&site->key; } @@ -182,13 +187,22 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func) } if (!kernel_text_address((unsigned long)site_addr)) { - WARN_ONCE(1, "can't patch static call site at %pS", + /* + * This skips patching built-in __exit, which + * is part of init_section_contains() but is + * not part of kernel_text_address(). + * + * Skipping built-in __exit is fine since it + * will never be executed. + */ + WARN_ONCE(!static_call_is_init(site), + "can't patch static call site at %pS", site_addr); continue; } arch_static_call_transform(site_addr, NULL, func, - static_call_is_tail(site)); + static_call_is_tail(site)); } } @@ -323,10 +337,60 @@ static int __static_call_mod_text_reserved(void *start, void *end) return ret; } +static unsigned long tramp_key_lookup(unsigned long addr) +{ + struct static_call_tramp_key *start = __start_static_call_tramp_key; + struct static_call_tramp_key *stop = __stop_static_call_tramp_key; + struct static_call_tramp_key *tramp_key; + + for (tramp_key = start; tramp_key != stop; tramp_key++) { + unsigned long tramp; + + tramp = (long)tramp_key->tramp + (long)&tramp_key->tramp; + if (tramp == addr) + return (long)tramp_key->key + (long)&tramp_key->key; + } + + return 0; +} + static int static_call_add_module(struct module *mod) { - return __static_call_init(mod, mod->static_call_sites, - mod->static_call_sites + mod->num_static_call_sites); + struct static_call_site *start = mod->static_call_sites; + struct static_call_site *stop = start + mod->num_static_call_sites; + struct static_call_site *site; + + for (site = start; site != stop; site++) { + unsigned long s_key = __static_call_key(site); + unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS; + unsigned long key; + + /* + * Is the key is exported, 'addr' points to the key, which + * means modules are allowed to call static_call_update() on + * it. + * + * Otherwise, the key isn't exported, and 'addr' points to the + * trampoline so we need to lookup the key. + * + * We go through this dance to prevent crazy modules from + * abusing sensitive static calls. + */ + if (!kernel_text_address(addr)) + continue; + + key = tramp_key_lookup(addr); + if (!key) { + pr_warn("Failed to fixup __raw_static_call() usage at: %ps\n", + static_call_addr(site)); + return -EINVAL; + } + + key |= s_key & STATIC_CALL_SITE_FLAGS; + site->key = key - (long)&site->key; + } + + return __static_call_init(mod, start, stop); } static void static_call_del_module(struct module *mod) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c9fbdd848138..0b17fdb1b1ff 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -103,6 +103,9 @@ #ifdef CONFIG_LOCKUP_DETECTOR #include #endif +#ifdef CONFIG_USER_NS +#include +#endif #if defined(CONFIG_SYSCTL) @@ -111,6 +114,22 @@ static int sixty = 60; #endif +#if defined(CONFIG_UNEVICTABLE_FILE) +#if CONFIG_UNEVICTABLE_FILE_KBYTES_LOW < 0 +#error "CONFIG_UNEVICTABLE_FILE_KBYTES_LOW should be >= 0" +#endif +#if CONFIG_UNEVICTABLE_FILE_KBYTES_MIN < 0 +#error "CONFIG_UNEVICTABLE_FILE_KBYTES_MIN should be >= 0" +#endif +#if CONFIG_UNEVICTABLE_FILE_KBYTES_LOW < CONFIG_UNEVICTABLE_FILE_KBYTES_MIN +#error "CONFIG_UNEVICTABLE_FILE_KBYTES_LOW should be >= CONFIG_UNEVICTABLE_FILE_KBYTES_MIN" +#endif +unsigned long sysctl_unevictable_file_kbytes_low __read_mostly = + CONFIG_UNEVICTABLE_FILE_KBYTES_LOW; +unsigned long sysctl_unevictable_file_kbytes_min __read_mostly = + CONFIG_UNEVICTABLE_FILE_KBYTES_MIN; +#endif + static int __maybe_unused neg_one = -1; static int __maybe_unused two = 2; static int __maybe_unused four = 4; @@ -1902,6 +1921,15 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif +#ifdef CONFIG_USER_NS + { + .procname = "unprivileged_userns_clone", + .data = &unprivileged_userns_clone, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_PROC_SYSCTL { .procname = "tainted", @@ -2962,7 +2990,7 @@ static struct ctl_table vm_table[] = { .data = &block_dump, .maxlen = sizeof(block_dump), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, }, { @@ -2970,7 +2998,7 @@ static struct ctl_table vm_table[] = { .data = &sysctl_vfs_cache_pressure, .maxlen = sizeof(sysctl_vfs_cache_pressure), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, }, #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \ @@ -2980,7 +3008,7 @@ static struct ctl_table vm_table[] = { .data = &sysctl_legacy_va_layout, .maxlen = sizeof(sysctl_legacy_va_layout), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, }, #endif @@ -2990,7 +3018,7 @@ static struct ctl_table vm_table[] = { .data = &node_reclaim_mode, .maxlen = sizeof(node_reclaim_mode), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, }, { @@ -3092,6 +3120,25 @@ static struct ctl_table vm_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, +#endif +#if defined(CONFIG_UNEVICTABLE_FILE) + { + .procname = "unevictable_file_kbytes_low", + .data = &sysctl_unevictable_file_kbytes_low, + .maxlen = sizeof(sysctl_unevictable_file_kbytes_low), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &sysctl_unevictable_file_kbytes_min, + }, + { + .procname = "unevictable_file_kbytes_min", + .data = &sysctl_unevictable_file_kbytes_min, + .maxlen = sizeof(sysctl_unevictable_file_kbytes_min), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &zero_ul, + .extra2 = &sysctl_unevictable_file_kbytes_low, + }, #endif { .procname = "user_reserve_kbytes", diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index f4ace1bf8382..daeaa7140d0a 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -848,9 +848,9 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags, if (flags == TIMER_ABSTIME) return -ERESTARTNOHAND; - restart->fn = alarm_timer_nsleep_restart; restart->nanosleep.clockid = type; restart->nanosleep.expires = exp; + set_restart_fn(restart, alarm_timer_nsleep_restart); return ret; } diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 743c852e10f2..5c9d968187ae 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -546,8 +546,11 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, } /* - * Recomputes cpu_base::*next_timer and returns the earliest expires_next but - * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram. + * Recomputes cpu_base::*next_timer and returns the earliest expires_next + * but does not set cpu_base::*expires_next, that is done by + * hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating + * cpu_base::*expires_next right away, reprogramming logic would no longer + * work. * * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases, * those timers will get run whenever the softirq gets handled, at the end of @@ -588,6 +591,37 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_ return expires_next; } +static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base) +{ + ktime_t expires_next, soft = KTIME_MAX; + + /* + * If the soft interrupt has already been activated, ignore the + * soft bases. They will be handled in the already raised soft + * interrupt. + */ + if (!cpu_base->softirq_activated) { + soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); + /* + * Update the soft expiry time. clock_settime() might have + * affected it. + */ + cpu_base->softirq_expires_next = soft; + } + + expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); + /* + * If a softirq timer is expiring first, update cpu_base->next_timer + * and program the hardware with the soft expiry time. + */ + if (expires_next > soft) { + cpu_base->next_timer = cpu_base->softirq_next_timer; + expires_next = soft; + } + + return expires_next; +} + static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) { ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; @@ -628,23 +662,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) { ktime_t expires_next; - /* - * Find the current next expiration time. - */ - expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); - - if (cpu_base->next_timer && cpu_base->next_timer->is_soft) { - /* - * When the softirq is activated, hrtimer has to be - * programmed with the first hard hrtimer because soft - * timer interrupt could occur too late. - */ - if (cpu_base->softirq_activated) - expires_next = __hrtimer_get_next_event(cpu_base, - HRTIMER_ACTIVE_HARD); - else - cpu_base->softirq_expires_next = expires_next; - } + expires_next = hrtimer_update_next_event(cpu_base); if (skip_equal && expires_next == cpu_base->expires_next) return; @@ -1644,8 +1662,8 @@ void hrtimer_interrupt(struct clock_event_device *dev) __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); - /* Reevaluate the clock bases for the next expiry */ - expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); + /* Reevaluate the clock bases for the [soft] next expiry */ + expires_next = hrtimer_update_next_event(cpu_base); /* * Store the new expiry value so the migration code can verify * against it. @@ -1939,9 +1957,9 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, } restart = ¤t->restart_block; - restart->fn = hrtimer_nanosleep_restart; restart->nanosleep.clockid = t.timer.base->clockid; restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); + set_restart_fn(restart, hrtimer_nanosleep_restart); out: destroy_hrtimer_on_stack(&t.timer); return ret; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index a71758e34e45..9abe15255bc4 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1480,8 +1480,8 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags, if (flags & TIMER_ABSTIME) return -ERESTARTNOHAND; - restart_block->fn = posix_cpu_nsleep_restart; restart_block->nanosleep.clockid = which_clock; + set_restart_fn(restart_block, posix_cpu_nsleep_restart); } return error; } diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 4d8e35575549..b7e29db127fa 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -5045,6 +5045,20 @@ struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr) return NULL; } +static struct ftrace_direct_func *ftrace_alloc_direct_func(unsigned long addr) +{ + struct ftrace_direct_func *direct; + + direct = kmalloc(sizeof(*direct), GFP_KERNEL); + if (!direct) + return NULL; + direct->addr = addr; + direct->count = 0; + list_add_rcu(&direct->next, &ftrace_direct_funcs); + ftrace_direct_func_count++; + return direct; +} + /** * register_ftrace_direct - Call a custom trampoline directly * @ip: The address of the nop at the beginning of a function @@ -5120,15 +5134,11 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr) direct = ftrace_find_direct_func(addr); if (!direct) { - direct = kmalloc(sizeof(*direct), GFP_KERNEL); + direct = ftrace_alloc_direct_func(addr); if (!direct) { kfree(entry); goto out_unlock; } - direct->addr = addr; - direct->count = 0; - list_add_rcu(&direct->next, &ftrace_direct_funcs); - ftrace_direct_func_count++; } entry->ip = ip; @@ -5329,6 +5339,7 @@ int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry, int modify_ftrace_direct(unsigned long ip, unsigned long old_addr, unsigned long new_addr) { + struct ftrace_direct_func *direct, *new_direct = NULL; struct ftrace_func_entry *entry; struct dyn_ftrace *rec; int ret = -ENODEV; @@ -5344,6 +5355,20 @@ int modify_ftrace_direct(unsigned long ip, if (entry->direct != old_addr) goto out_unlock; + direct = ftrace_find_direct_func(old_addr); + if (WARN_ON(!direct)) + goto out_unlock; + if (direct->count > 1) { + ret = -ENOMEM; + new_direct = ftrace_alloc_direct_func(new_addr); + if (!new_direct) + goto out_unlock; + direct->count--; + new_direct->count++; + } else { + direct->addr = new_addr; + } + /* * If there's no other ftrace callback on the rec->ip location, * then it can be changed directly by the architecture. @@ -5357,6 +5382,14 @@ int modify_ftrace_direct(unsigned long ip, ret = 0; } + if (unlikely(ret && new_direct)) { + direct->count++; + list_del_rcu(&new_direct->next); + synchronize_rcu_tasks(); + kfree(new_direct); + ftrace_direct_func_count--; + } + out_unlock: mutex_unlock(&ftrace_lock); mutex_unlock(&direct_mutex); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index ec08f948dd80..063f8ea6aad9 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2821,6 +2821,17 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, write_stamp, write_stamp - delta)) return 0; + /* + * It's possible that the event time delta is zero + * (has the same time stamp as the previous event) + * in which case write_stamp and before_stamp could + * be the same. In such a case, force before_stamp + * to be different than write_stamp. It doesn't + * matter what it is, as long as its different. + */ + if (!delta) + rb_time_set(&cpu_buffer->before_stamp, 0); + /* * If an event were to come in now, it would see that the * write_stamp and the before_stamp are different, and assume diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 7261fa0f5e3c..e8f20ae29c18 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -53,6 +53,12 @@ struct tp_probes { struct tracepoint_func probes[]; }; +/* Called in removal of a func but failed to allocate a new tp_funcs */ +static void tp_stub_func(void) +{ + return; +} + static inline void *allocate_probes(int count) { struct tp_probes *p = kmalloc(struct_size(p, probes, count), @@ -131,6 +137,7 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, { struct tracepoint_func *old, *new; int nr_probes = 0; + int stub_funcs = 0; int pos = -1; if (WARN_ON(!tp_func->func)) @@ -147,14 +154,34 @@ func_add(struct tracepoint_func **funcs, struct tracepoint_func *tp_func, if (old[nr_probes].func == tp_func->func && old[nr_probes].data == tp_func->data) return ERR_PTR(-EEXIST); + if (old[nr_probes].func == tp_stub_func) + stub_funcs++; } } - /* + 2 : one for new probe, one for NULL func */ - new = allocate_probes(nr_probes + 2); + /* + 2 : one for new probe, one for NULL func - stub functions */ + new = allocate_probes(nr_probes + 2 - stub_funcs); if (new == NULL) return ERR_PTR(-ENOMEM); if (old) { - if (pos < 0) { + if (stub_funcs) { + /* Need to copy one at a time to remove stubs */ + int probes = 0; + + pos = -1; + for (nr_probes = 0; old[nr_probes].func; nr_probes++) { + if (old[nr_probes].func == tp_stub_func) + continue; + if (pos < 0 && old[nr_probes].prio < prio) + pos = probes++; + new[probes++] = old[nr_probes]; + } + nr_probes = probes; + if (pos < 0) + pos = probes; + else + nr_probes--; /* Account for insertion */ + + } else if (pos < 0) { pos = nr_probes; memcpy(new, old, nr_probes * sizeof(struct tracepoint_func)); } else { @@ -188,8 +215,9 @@ static void *func_remove(struct tracepoint_func **funcs, /* (N -> M), (N > 1, M >= 0) probes */ if (tp_func->func) { for (nr_probes = 0; old[nr_probes].func; nr_probes++) { - if (old[nr_probes].func == tp_func->func && - old[nr_probes].data == tp_func->data) + if ((old[nr_probes].func == tp_func->func && + old[nr_probes].data == tp_func->data) || + old[nr_probes].func == tp_stub_func) nr_del++; } } @@ -208,14 +236,32 @@ static void *func_remove(struct tracepoint_func **funcs, /* N -> M, (N > 1, M > 0) */ /* + 1 for NULL */ new = allocate_probes(nr_probes - nr_del + 1); - if (new == NULL) - return ERR_PTR(-ENOMEM); - for (i = 0; old[i].func; i++) - if (old[i].func != tp_func->func - || old[i].data != tp_func->data) - new[j++] = old[i]; - new[nr_probes - nr_del].func = NULL; - *funcs = new; + if (new) { + for (i = 0; old[i].func; i++) + if ((old[i].func != tp_func->func + || old[i].data != tp_func->data) + && old[i].func != tp_stub_func) + new[j++] = old[i]; + new[nr_probes - nr_del].func = NULL; + *funcs = new; + } else { + /* + * Failed to allocate, replace the old function + * with calls to tp_stub_func. + */ + for (i = 0; old[i].func; i++) + if (old[i].func == tp_func->func && + old[i].data == tp_func->data) { + old[i].func = tp_stub_func; + /* Set the prio to the next event. */ + if (old[i + 1].func) + old[i].prio = + old[i + 1].prio; + else + old[i].prio = -1; + } + *funcs = old; + } } debug_print_probes(*funcs); return old; @@ -295,10 +341,12 @@ static int tracepoint_remove_func(struct tracepoint *tp, tp_funcs = rcu_dereference_protected(tp->funcs, lockdep_is_held(&tracepoints_mutex)); old = func_remove(&tp_funcs, func); - if (IS_ERR(old)) { - WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM); + if (WARN_ON_ONCE(IS_ERR(old))) return PTR_ERR(old); - } + + if (tp_funcs == old) + /* Failed allocating new tp_funcs, replaced func with stub */ + return 0; if (!tp_funcs) { /* Removed last function */ diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index af612945a4d0..95c54dae4aa1 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -21,6 +21,13 @@ #include #include +/* sysctl */ +#ifdef CONFIG_USER_NS_UNPRIVILEGED +int unprivileged_userns_clone = 1; +#else +int unprivileged_userns_clone; +#endif + static struct kmem_cache *user_ns_cachep __read_mostly; static DEFINE_MUTEX(userns_state_mutex); diff --git a/kernel/usermode_driver.c b/kernel/usermode_driver.c index 0b35212ffc3d..bb7bb3b478ab 100644 --- a/kernel/usermode_driver.c +++ b/kernel/usermode_driver.c @@ -139,13 +139,22 @@ static void umd_cleanup(struct subprocess_info *info) struct umd_info *umd_info = info->data; /* cleanup if umh_setup() was successful but exec failed */ - if (info->retval) { - fput(umd_info->pipe_to_umh); - fput(umd_info->pipe_from_umh); - put_pid(umd_info->tgid); - umd_info->tgid = NULL; - } + if (info->retval) + umd_cleanup_helper(umd_info); +} + +/** + * umd_cleanup_helper - release the resources which were allocated in umd_setup + * @info: information about usermode driver + */ +void umd_cleanup_helper(struct umd_info *info) +{ + fput(info->pipe_to_umh); + fput(info->pipe_from_umh); + put_pid(info->tgid); + info->tgid = NULL; } +EXPORT_SYMBOL_GPL(umd_cleanup_helper); /** * fork_usermode_driver - fork a usermode driver diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index f5fa4ba126bf..0d3b7940cf43 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -156,6 +156,7 @@ config KASAN_STACK_ENABLE config KASAN_STACK int + depends on KASAN_GENERIC || KASAN_SW_TAGS default 1 if KASAN_STACK_ENABLE || CC_IS_GCC default 0 diff --git a/lib/decompress_unzstd.c b/lib/decompress_unzstd.c index 790abc472f5b..6e5ecfba0a8d 100644 --- a/lib/decompress_unzstd.c +++ b/lib/decompress_unzstd.c @@ -68,11 +68,7 @@ #ifdef STATIC # define UNZSTD_PREBOOT # include "xxhash.c" -# include "zstd/entropy_common.c" -# include "zstd/fse_decompress.c" -# include "zstd/huf_decompress.c" -# include "zstd/zstd_common.c" -# include "zstd/decompress.c" +# include "zstd/decompress_sources.h" #endif #include @@ -91,11 +87,15 @@ static int INIT handle_zstd_error(size_t ret, void (*error)(char *x)) { - const int err = ZSTD_getErrorCode(ret); + const zstd_error_code err = zstd_get_error_code(ret); - if (!ZSTD_isError(ret)) + if (!zstd_is_error(ret)) return 0; + /* + * zstd_get_error_name() cannot be used because error takes a char * + * not a const char * + */ switch (err) { case ZSTD_error_memory_allocation: error("ZSTD decompressor ran out of memory"); @@ -124,28 +124,28 @@ static int INIT decompress_single(const u8 *in_buf, long in_len, u8 *out_buf, long out_len, long *in_pos, void (*error)(char *x)) { - const size_t wksp_size = ZSTD_DCtxWorkspaceBound(); + const size_t wksp_size = zstd_dctx_workspace_bound(); void *wksp = large_malloc(wksp_size); - ZSTD_DCtx *dctx = ZSTD_initDCtx(wksp, wksp_size); + zstd_dctx *dctx = zstd_init_dctx(wksp, wksp_size); int err; size_t ret; if (dctx == NULL) { - error("Out of memory while allocating ZSTD_DCtx"); + error("Out of memory while allocating zstd_dctx"); err = -1; goto out; } /* * Find out how large the frame actually is, there may be junk at - * the end of the frame that ZSTD_decompressDCtx() can't handle. + * the end of the frame that zstd_decompress_dctx() can't handle. */ - ret = ZSTD_findFrameCompressedSize(in_buf, in_len); + ret = zstd_find_frame_compressed_size(in_buf, in_len); err = handle_zstd_error(ret, error); if (err) goto out; in_len = (long)ret; - ret = ZSTD_decompressDCtx(dctx, out_buf, out_len, in_buf, in_len); + ret = zstd_decompress_dctx(dctx, out_buf, out_len, in_buf, in_len); err = handle_zstd_error(ret, error); if (err) goto out; @@ -167,14 +167,14 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len, long *in_pos, void (*error)(char *x)) { - ZSTD_inBuffer in; - ZSTD_outBuffer out; - ZSTD_frameParams params; + zstd_in_buffer in; + zstd_out_buffer out; + zstd_frame_header header; void *in_allocated = NULL; void *out_allocated = NULL; void *wksp = NULL; size_t wksp_size; - ZSTD_DStream *dstream; + zstd_dstream *dstream; int err; size_t ret; @@ -238,13 +238,13 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len, out.size = out_len; /* - * We need to know the window size to allocate the ZSTD_DStream. + * We need to know the window size to allocate the zstd_dstream. * Since we are streaming, we need to allocate a buffer for the sliding * window. The window size varies from 1 KB to ZSTD_WINDOWSIZE_MAX * (8 MB), so it is important to use the actual value so as not to * waste memory when it is smaller. */ - ret = ZSTD_getFrameParams(¶ms, in.src, in.size); + ret = zstd_get_frame_header(&header, in.src, in.size); err = handle_zstd_error(ret, error); if (err) goto out; @@ -253,19 +253,19 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len, err = -1; goto out; } - if (params.windowSize > ZSTD_WINDOWSIZE_MAX) { + if (header.windowSize > ZSTD_WINDOWSIZE_MAX) { error("ZSTD-compressed data has too large a window size"); err = -1; goto out; } /* - * Allocate the ZSTD_DStream now that we know how much memory is + * Allocate the zstd_dstream now that we know how much memory is * required. */ - wksp_size = ZSTD_DStreamWorkspaceBound(params.windowSize); + wksp_size = zstd_dstream_workspace_bound(header.windowSize); wksp = large_malloc(wksp_size); - dstream = ZSTD_initDStream(params.windowSize, wksp, wksp_size); + dstream = zstd_init_dstream(header.windowSize, wksp, wksp_size); if (dstream == NULL) { error("Out of memory while allocating ZSTD_DStream"); err = -1; @@ -298,7 +298,7 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len, in.size = in_len; } /* Returns zero when the frame is complete. */ - ret = ZSTD_decompressStream(dstream, &out, &in); + ret = zstd_decompress_stream(dstream, &out, &in); err = handle_zstd_error(ret, error); if (err) goto out; diff --git a/lib/logic_pio.c b/lib/logic_pio.c index f32fe481b492..07b4b9a1f54b 100644 --- a/lib/logic_pio.c +++ b/lib/logic_pio.c @@ -28,6 +28,8 @@ static DEFINE_MUTEX(io_range_mutex); * @new_range: pointer to the IO range to be registered. * * Returns 0 on success, the error code in case of failure. + * If the range already exists, -EEXIST will be returned, which should be + * considered a success. * * Register a new IO range node in the IO range list. */ @@ -51,6 +53,7 @@ int logic_pio_register_range(struct logic_pio_hwaddr *new_range) list_for_each_entry(range, &io_range_list, list) { if (range->fwnode == new_range->fwnode) { /* range already there */ + ret = -EEXIST; goto end_register; } if (range->flags == LOGIC_PIO_CPU_MMIO && diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 2947274cc2d3..5a2f104ca13f 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -737,13 +737,13 @@ static void kasan_bitops_tags(struct kunit *test) return; } - /* Allocation size will be rounded to up granule size, which is 16. */ - bits = kzalloc(sizeof(*bits), GFP_KERNEL); + /* kmalloc-64 cache will be used and the last 16 bytes will be the redzone. */ + bits = kzalloc(48, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bits); - /* Do the accesses past the 16 allocated bytes. */ - kasan_bitops_modify(test, BITS_PER_LONG, &bits[1]); - kasan_bitops_test_and_modify(test, BITS_PER_LONG + BITS_PER_BYTE, &bits[1]); + /* Do the accesses past the 48 allocated bytes, but within the redone. */ + kasan_bitops_modify(test, BITS_PER_LONG, (void *)bits + 48); + kasan_bitops_test_and_modify(test, BITS_PER_LONG + BITS_PER_BYTE, (void *)bits + 48); kfree(bits); } diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile index f5d778e7e5c7..19485e3cc7c9 100644 --- a/lib/zstd/Makefile +++ b/lib/zstd/Makefile @@ -1,10 +1,46 @@ # SPDX-License-Identifier: GPL-2.0-only +# ################################################################ +# Copyright (c) Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under both the BSD-style license (found in the +# LICENSE file in the root directory of this source tree) and the GPLv2 (found +# in the COPYING file in the root directory of this source tree). +# You may select, at your option, one of the above-listed licenses. +# ################################################################ obj-$(CONFIG_ZSTD_COMPRESS) += zstd_compress.o obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o ccflags-y += -O3 -zstd_compress-y := fse_compress.o huf_compress.o compress.o \ - entropy_common.o fse_decompress.o zstd_common.o -zstd_decompress-y := huf_decompress.o decompress.o \ - entropy_common.o fse_decompress.o zstd_common.o +zstd_compress-y := \ + zstd_compress_module.o \ + common/debug.o \ + common/entropy_common.o \ + common/error_private.o \ + common/fse_decompress.o \ + common/zstd_common.o \ + compress/fse_compress.o \ + compress/hist.o \ + compress/huf_compress.o \ + compress/zstd_compress.o \ + compress/zstd_compress_literals.o \ + compress/zstd_compress_sequences.o \ + compress/zstd_compress_superblock.o \ + compress/zstd_double_fast.o \ + compress/zstd_fast.o \ + compress/zstd_lazy.o \ + compress/zstd_ldm.o \ + compress/zstd_opt.o \ + +zstd_decompress-y := \ + zstd_decompress_module.o \ + common/debug.o \ + common/entropy_common.o \ + common/error_private.o \ + common/fse_decompress.o \ + common/zstd_common.o \ + decompress/huf_decompress.o \ + decompress/zstd_ddict.o \ + decompress/zstd_decompress.o \ + decompress/zstd_decompress_block.o \ diff --git a/lib/zstd/bitstream.h b/lib/zstd/bitstream.h deleted file mode 100644 index 5d6343c1a909..000000000000 --- a/lib/zstd/bitstream.h +++ /dev/null @@ -1,380 +0,0 @@ -/* - * bitstream - * Part of FSE library - * header file (to include) - * Copyright (C) 2013-2016, Yann Collet. - * - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - */ -#ifndef BITSTREAM_H_MODULE -#define BITSTREAM_H_MODULE - -/* -* This API consists of small unitary functions, which must be inlined for best performance. -* Since link-time-optimization is not available for all compilers, -* these functions are defined into a .h to be included. -*/ - -/*-**************************************** -* Dependencies -******************************************/ -#include "error_private.h" /* error codes and messages */ -#include "mem.h" /* unaligned access routines */ - -/*========================================= -* Target specific -=========================================*/ -#define STREAM_ACCUMULATOR_MIN_32 25 -#define STREAM_ACCUMULATOR_MIN_64 57 -#define STREAM_ACCUMULATOR_MIN ((U32)(ZSTD_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) - -/*-****************************************** -* bitStream encoding API (write forward) -********************************************/ -/* bitStream can mix input from multiple sources. -* A critical property of these streams is that they encode and decode in **reverse** direction. -* So the first bit sequence you add will be the last to be read, like a LIFO stack. -*/ -typedef struct { - size_t bitContainer; - int bitPos; - char *startPtr; - char *ptr; - char *endPtr; -} BIT_CStream_t; - -ZSTD_STATIC size_t BIT_initCStream(BIT_CStream_t *bitC, void *dstBuffer, size_t dstCapacity); -ZSTD_STATIC void BIT_addBits(BIT_CStream_t *bitC, size_t value, unsigned nbBits); -ZSTD_STATIC void BIT_flushBits(BIT_CStream_t *bitC); -ZSTD_STATIC size_t BIT_closeCStream(BIT_CStream_t *bitC); - -/* Start with initCStream, providing the size of buffer to write into. -* bitStream will never write outside of this buffer. -* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. -* -* bits are first added to a local register. -* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. -* Writing data into memory is an explicit operation, performed by the flushBits function. -* Hence keep track how many bits are potentially stored into local register to avoid register overflow. -* After a flushBits, a maximum of 7 bits might still be stored into local register. -* -* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. -* -* Last operation is to close the bitStream. -* The function returns the final size of CStream in bytes. -* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) -*/ - -/*-******************************************** -* bitStream decoding API (read backward) -**********************************************/ -typedef struct { - size_t bitContainer; - unsigned bitsConsumed; - const char *ptr; - const char *start; -} BIT_DStream_t; - -typedef enum { - BIT_DStream_unfinished = 0, - BIT_DStream_endOfBuffer = 1, - BIT_DStream_completed = 2, - BIT_DStream_overflow = 3 -} BIT_DStream_status; /* result of BIT_reloadDStream() */ -/* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ - -ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize); -ZSTD_STATIC size_t BIT_readBits(BIT_DStream_t *bitD, unsigned nbBits); -ZSTD_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD); -ZSTD_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *bitD); - -/* Start by invoking BIT_initDStream(). -* A chunk of the bitStream is then stored into a local register. -* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). -* You can then retrieve bitFields stored into the local register, **in reverse order**. -* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. -* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. -* Otherwise, it can be less than that, so proceed accordingly. -* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). -*/ - -/*-**************************************** -* unsafe API -******************************************/ -ZSTD_STATIC void BIT_addBitsFast(BIT_CStream_t *bitC, size_t value, unsigned nbBits); -/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ - -ZSTD_STATIC void BIT_flushBitsFast(BIT_CStream_t *bitC); -/* unsafe version; does not check buffer overflow */ - -ZSTD_STATIC size_t BIT_readBitsFast(BIT_DStream_t *bitD, unsigned nbBits); -/* faster, but works only if nbBits >= 1 */ - -/*-************************************************************** -* Internal functions -****************************************************************/ -ZSTD_STATIC unsigned BIT_highbit32(register U32 val) { return 31 - __builtin_clz(val); } - -/*===== Local Constants =====*/ -static const unsigned BIT_mask[] = {0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, - 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, - 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF}; /* up to 26 bits */ - -/*-************************************************************** -* bitStream encoding -****************************************************************/ -/*! BIT_initCStream() : - * `dstCapacity` must be > sizeof(void*) - * @return : 0 if success, - otherwise an error code (can be tested using ERR_isError() ) */ -ZSTD_STATIC size_t BIT_initCStream(BIT_CStream_t *bitC, void *startPtr, size_t dstCapacity) -{ - bitC->bitContainer = 0; - bitC->bitPos = 0; - bitC->startPtr = (char *)startPtr; - bitC->ptr = bitC->startPtr; - bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr); - if (dstCapacity <= sizeof(bitC->ptr)) - return ERROR(dstSize_tooSmall); - return 0; -} - -/*! BIT_addBits() : - can add up to 26 bits into `bitC`. - Does not check for register overflow ! */ -ZSTD_STATIC void BIT_addBits(BIT_CStream_t *bitC, size_t value, unsigned nbBits) -{ - bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; - bitC->bitPos += nbBits; -} - -/*! BIT_addBitsFast() : - * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */ -ZSTD_STATIC void BIT_addBitsFast(BIT_CStream_t *bitC, size_t value, unsigned nbBits) -{ - bitC->bitContainer |= value << bitC->bitPos; - bitC->bitPos += nbBits; -} - -/*! BIT_flushBitsFast() : - * unsafe version; does not check buffer overflow */ -ZSTD_STATIC void BIT_flushBitsFast(BIT_CStream_t *bitC) -{ - size_t const nbBytes = bitC->bitPos >> 3; - ZSTD_writeLEST(bitC->ptr, bitC->bitContainer); - bitC->ptr += nbBytes; - bitC->bitPos &= 7; - bitC->bitContainer >>= nbBytes * 8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ -} - -/*! BIT_flushBits() : - * safe version; check for buffer overflow, and prevents it. - * note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */ -ZSTD_STATIC void BIT_flushBits(BIT_CStream_t *bitC) -{ - size_t const nbBytes = bitC->bitPos >> 3; - ZSTD_writeLEST(bitC->ptr, bitC->bitContainer); - bitC->ptr += nbBytes; - if (bitC->ptr > bitC->endPtr) - bitC->ptr = bitC->endPtr; - bitC->bitPos &= 7; - bitC->bitContainer >>= nbBytes * 8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */ -} - -/*! BIT_closeCStream() : - * @return : size of CStream, in bytes, - or 0 if it could not fit into dstBuffer */ -ZSTD_STATIC size_t BIT_closeCStream(BIT_CStream_t *bitC) -{ - BIT_addBitsFast(bitC, 1, 1); /* endMark */ - BIT_flushBits(bitC); - - if (bitC->ptr >= bitC->endPtr) - return 0; /* doesn't fit within authorized budget : cancel */ - - return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); -} - -/*-******************************************************** -* bitStream decoding -**********************************************************/ -/*! BIT_initDStream() : -* Initialize a BIT_DStream_t. -* `bitD` : a pointer to an already allocated BIT_DStream_t structure. -* `srcSize` must be the *exact* size of the bitStream, in bytes. -* @return : size of stream (== srcSize) or an errorCode if a problem is detected -*/ -ZSTD_STATIC size_t BIT_initDStream(BIT_DStream_t *bitD, const void *srcBuffer, size_t srcSize) -{ - if (srcSize < 1) { - memset(bitD, 0, sizeof(*bitD)); - return ERROR(srcSize_wrong); - } - - if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ - bitD->start = (const char *)srcBuffer; - bitD->ptr = (const char *)srcBuffer + srcSize - sizeof(bitD->bitContainer); - bitD->bitContainer = ZSTD_readLEST(bitD->ptr); - { - BYTE const lastByte = ((const BYTE *)srcBuffer)[srcSize - 1]; - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ - if (lastByte == 0) - return ERROR(GENERIC); /* endMark not present */ - } - } else { - bitD->start = (const char *)srcBuffer; - bitD->ptr = bitD->start; - bitD->bitContainer = *(const BYTE *)(bitD->start); - switch (srcSize) { - case 7: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[6]) << (sizeof(bitD->bitContainer) * 8 - 16); - fallthrough; - case 6: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[5]) << (sizeof(bitD->bitContainer) * 8 - 24); - fallthrough; - case 5: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[4]) << (sizeof(bitD->bitContainer) * 8 - 32); - fallthrough; - case 4: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[3]) << 24; - fallthrough; - case 3: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[2]) << 16; - fallthrough; - case 2: bitD->bitContainer += (size_t)(((const BYTE *)(srcBuffer))[1]) << 8; - fallthrough; - default:; - } - { - BYTE const lastByte = ((const BYTE *)srcBuffer)[srcSize - 1]; - bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; - if (lastByte == 0) - return ERROR(GENERIC); /* endMark not present */ - } - bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize) * 8; - } - - return srcSize; -} - -ZSTD_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start) { return bitContainer >> start; } - -ZSTD_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) { return (bitContainer >> start) & BIT_mask[nbBits]; } - -ZSTD_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) { return bitContainer & BIT_mask[nbBits]; } - -/*! BIT_lookBits() : - * Provides next n bits from local register. - * local register is not modified. - * On 32-bits, maxNbBits==24. - * On 64-bits, maxNbBits==56. - * @return : value extracted - */ -ZSTD_STATIC size_t BIT_lookBits(const BIT_DStream_t *bitD, U32 nbBits) -{ - U32 const bitMask = sizeof(bitD->bitContainer) * 8 - 1; - return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask - nbBits) & bitMask); -} - -/*! BIT_lookBitsFast() : -* unsafe version; only works only if nbBits >= 1 */ -ZSTD_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t *bitD, U32 nbBits) -{ - U32 const bitMask = sizeof(bitD->bitContainer) * 8 - 1; - return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask + 1) - nbBits) & bitMask); -} - -ZSTD_STATIC void BIT_skipBits(BIT_DStream_t *bitD, U32 nbBits) { bitD->bitsConsumed += nbBits; } - -/*! BIT_readBits() : - * Read (consume) next n bits from local register and update. - * Pay attention to not read more than nbBits contained into local register. - * @return : extracted value. - */ -ZSTD_STATIC size_t BIT_readBits(BIT_DStream_t *bitD, U32 nbBits) -{ - size_t const value = BIT_lookBits(bitD, nbBits); - BIT_skipBits(bitD, nbBits); - return value; -} - -/*! BIT_readBitsFast() : -* unsafe version; only works only if nbBits >= 1 */ -ZSTD_STATIC size_t BIT_readBitsFast(BIT_DStream_t *bitD, U32 nbBits) -{ - size_t const value = BIT_lookBitsFast(bitD, nbBits); - BIT_skipBits(bitD, nbBits); - return value; -} - -/*! BIT_reloadDStream() : -* Refill `bitD` from buffer previously set in BIT_initDStream() . -* This function is safe, it guarantees it will not read beyond src buffer. -* @return : status of `BIT_DStream_t` internal register. - if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */ -ZSTD_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t *bitD) -{ - if (bitD->bitsConsumed > (sizeof(bitD->bitContainer) * 8)) /* should not happen => corruption detected */ - return BIT_DStream_overflow; - - if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) { - bitD->ptr -= bitD->bitsConsumed >> 3; - bitD->bitsConsumed &= 7; - bitD->bitContainer = ZSTD_readLEST(bitD->ptr); - return BIT_DStream_unfinished; - } - if (bitD->ptr == bitD->start) { - if (bitD->bitsConsumed < sizeof(bitD->bitContainer) * 8) - return BIT_DStream_endOfBuffer; - return BIT_DStream_completed; - } - { - U32 nbBytes = bitD->bitsConsumed >> 3; - BIT_DStream_status result = BIT_DStream_unfinished; - if (bitD->ptr - nbBytes < bitD->start) { - nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ - result = BIT_DStream_endOfBuffer; - } - bitD->ptr -= nbBytes; - bitD->bitsConsumed -= nbBytes * 8; - bitD->bitContainer = ZSTD_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */ - return result; - } -} - -/*! BIT_endOfDStream() : -* @return Tells if DStream has exactly reached its end (all bits consumed). -*/ -ZSTD_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t *DStream) -{ - return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer) * 8)); -} - -#endif /* BITSTREAM_H_MODULE */ diff --git a/lib/zstd/common/bitstream.h b/lib/zstd/common/bitstream.h new file mode 100644 index 000000000000..2d6c95b4f40c --- /dev/null +++ b/lib/zstd/common/bitstream.h @@ -0,0 +1,437 @@ +/* ****************************************************************** + * bitstream + * Part of FSE library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ +#ifndef BITSTREAM_H_MODULE +#define BITSTREAM_H_MODULE + +/* +* This API consists of small unitary functions, which must be inlined for best performance. +* Since link-time-optimization is not available for all compilers, +* these functions are defined into a .h to be included. +*/ + +/*-**************************************** +* Dependencies +******************************************/ +#include "mem.h" /* unaligned access routines */ +#include "compiler.h" /* UNLIKELY() */ +#include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */ +#include "error_private.h" /* error codes and messages */ + + +/*========================================= +* Target specific +=========================================*/ + +#define STREAM_ACCUMULATOR_MIN_32 25 +#define STREAM_ACCUMULATOR_MIN_64 57 +#define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64)) + + +/*-****************************************** +* bitStream encoding API (write forward) +********************************************/ +/* bitStream can mix input from multiple sources. + * A critical property of these streams is that they encode and decode in **reverse** direction. + * So the first bit sequence you add will be the last to be read, like a LIFO stack. + */ +typedef struct { + size_t bitContainer; + unsigned bitPos; + char* startPtr; + char* ptr; + char* endPtr; +} BIT_CStream_t; + +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity); +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC); +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC); + +/* Start with initCStream, providing the size of buffer to write into. +* bitStream will never write outside of this buffer. +* `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code. +* +* bits are first added to a local register. +* Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems. +* Writing data into memory is an explicit operation, performed by the flushBits function. +* Hence keep track how many bits are potentially stored into local register to avoid register overflow. +* After a flushBits, a maximum of 7 bits might still be stored into local register. +* +* Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers. +* +* Last operation is to close the bitStream. +* The function returns the final size of CStream in bytes. +* If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable) +*/ + + +/*-******************************************** +* bitStream decoding API (read backward) +**********************************************/ +typedef struct { + size_t bitContainer; + unsigned bitsConsumed; + const char* ptr; + const char* start; + const char* limitPtr; +} BIT_DStream_t; + +typedef enum { BIT_DStream_unfinished = 0, + BIT_DStream_endOfBuffer = 1, + BIT_DStream_completed = 2, + BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */ + /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */ + +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize); +MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits); +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD); +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD); + + +/* Start by invoking BIT_initDStream(). +* A chunk of the bitStream is then stored into a local register. +* Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +* You can then retrieve bitFields stored into the local register, **in reverse order**. +* Local register is explicitly reloaded from memory by the BIT_reloadDStream() method. +* A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished. +* Otherwise, it can be less than that, so proceed accordingly. +* Checking if DStream has reached its end can be performed with BIT_endOfDStream(). +*/ + + +/*-**************************************** +* unsafe API +******************************************/ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits); +/* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */ + +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC); +/* unsafe version; does not check buffer overflow */ + +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits); +/* faster, but works only if nbBits >= 1 */ + + + +/*-************************************************************** +* Internal functions +****************************************************************/ +MEM_STATIC unsigned BIT_highbit32 (U32 val) +{ + assert(val != 0); + { +# if (__GNUC__ >= 3) /* Use GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# else /* Software version */ + static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, + 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, + 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27]; +# endif + } +} + +/*===== Local Constants =====*/ +static const unsigned BIT_mask[] = { + 0, 1, 3, 7, 0xF, 0x1F, + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, + 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, + 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF, + 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */ +#define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0])) + +/*-************************************************************** +* bitStream encoding +****************************************************************/ +/*! BIT_initCStream() : + * `dstCapacity` must be > sizeof(size_t) + * @return : 0 if success, + * otherwise an error code (can be tested using ERR_isError()) */ +MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, + void* startPtr, size_t dstCapacity) +{ + bitC->bitContainer = 0; + bitC->bitPos = 0; + bitC->startPtr = (char*)startPtr; + bitC->ptr = bitC->startPtr; + bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer); + if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall); + return 0; +} + +/*! BIT_addBits() : + * can add up to 31 bits into `bitC`. + * Note : does not check for register overflow ! */ +MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + DEBUG_STATIC_ASSERT(BIT_MASK_SIZE == 32); + assert(nbBits < BIT_MASK_SIZE); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_addBitsFast() : + * works only if `value` is _clean_, + * meaning all high bits above nbBits are 0 */ +MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, + size_t value, unsigned nbBits) +{ + assert((value>>nbBits) == 0); + assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8); + bitC->bitContainer |= value << bitC->bitPos; + bitC->bitPos += nbBits; +} + +/*! BIT_flushBitsFast() : + * assumption : bitContainer has not overflowed + * unsafe version; does not check buffer overflow */ +MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_flushBits() : + * assumption : bitContainer has not overflowed + * safe version; check for buffer overflow, and prevents it. + * note : does not signal buffer overflow. + * overflow will be revealed later on using BIT_closeCStream() */ +MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC) +{ + size_t const nbBytes = bitC->bitPos >> 3; + assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8); + assert(bitC->ptr <= bitC->endPtr); + MEM_writeLEST(bitC->ptr, bitC->bitContainer); + bitC->ptr += nbBytes; + if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr; + bitC->bitPos &= 7; + bitC->bitContainer >>= nbBytes*8; +} + +/*! BIT_closeCStream() : + * @return : size of CStream, in bytes, + * or 0 if it could not fit into dstBuffer */ +MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC) +{ + BIT_addBitsFast(bitC, 1, 1); /* endMark */ + BIT_flushBits(bitC); + if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */ + return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0); +} + + +/*-******************************************************** +* bitStream decoding +**********************************************************/ +/*! BIT_initDStream() : + * Initialize a BIT_DStream_t. + * `bitD` : a pointer to an already allocated BIT_DStream_t structure. + * `srcSize` must be the *exact* size of the bitStream, in bytes. + * @return : size of stream (== srcSize), or an errorCode if a problem is detected + */ +MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize) +{ + if (srcSize < 1) { ZSTD_memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); } + + bitD->start = (const char*)srcBuffer; + bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer); + + if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */ + bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); + bitD->bitContainer = MEM_readLEST(bitD->ptr); + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ + if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } + } else { + bitD->ptr = bitD->start; + bitD->bitContainer = *(const BYTE*)(bitD->start); + switch(srcSize) + { + case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16); + /* fall-through */ + + case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24); + /* fall-through */ + + case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32); + /* fall-through */ + + case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24; + /* fall-through */ + + case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16; + /* fall-through */ + + case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8; + /* fall-through */ + + default: break; + } + { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; + bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; + if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */ + } + bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8; + } + + return srcSize; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start) +{ + return bitContainer >> start; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) +{ + U32 const regMask = sizeof(bitContainer)*8 - 1; + /* if start > regMask, bitstream is corrupted, and result is undefined */ + assert(nbBits < BIT_MASK_SIZE); + return (bitContainer >> (start & regMask)) & BIT_mask[nbBits]; +} + +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits) +{ + assert(nbBits < BIT_MASK_SIZE); + return bitContainer & BIT_mask[nbBits]; +} + +/*! BIT_lookBits() : + * Provides next n bits from local register. + * local register is not modified. + * On 32-bits, maxNbBits==24. + * On 64-bits, maxNbBits==56. + * @return : value extracted */ +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits) +{ + /* arbitrate between double-shift and shift+mask */ +#if 1 + /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8, + * bitstream is likely corrupted, and result is undefined */ + return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits); +#else + /* this code path is slower on my os-x laptop */ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask); +#endif +} + +/*! BIT_lookBitsFast() : + * unsafe version; only works if nbBits >= 1 */ +MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits) +{ + U32 const regMask = sizeof(bitD->bitContainer)*8 - 1; + assert(nbBits >= 1); + return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask); +} + +MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits) +{ + bitD->bitsConsumed += nbBits; +} + +/*! BIT_readBits() : + * Read (consume) next n bits from local register and update. + * Pay attention to not read more than nbBits contained into local register. + * @return : extracted value. */ +MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits) +{ + size_t const value = BIT_lookBits(bitD, nbBits); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_readBitsFast() : + * unsafe version; only works only if nbBits >= 1 */ +MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits) +{ + size_t const value = BIT_lookBitsFast(bitD, nbBits); + assert(nbBits >= 1); + BIT_skipBits(bitD, nbBits); + return value; +} + +/*! BIT_reloadDStreamFast() : + * Similar to BIT_reloadDStream(), but with two differences: + * 1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold! + * 2. Returns BIT_DStream_overflow when bitD->ptr < bitD->limitPtr, at this + * point you must use BIT_reloadDStream() to reload. + */ +MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD) +{ + if (UNLIKELY(bitD->ptr < bitD->limitPtr)) + return BIT_DStream_overflow; + assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8); + bitD->ptr -= bitD->bitsConsumed >> 3; + bitD->bitsConsumed &= 7; + bitD->bitContainer = MEM_readLEST(bitD->ptr); + return BIT_DStream_unfinished; +} + +/*! BIT_reloadDStream() : + * Refill `bitD` from buffer previously set in BIT_initDStream() . + * This function is safe, it guarantees it will not read beyond src buffer. + * @return : status of `BIT_DStream_t` internal register. + * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */ +MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) +{ + if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */ + return BIT_DStream_overflow; + + if (bitD->ptr >= bitD->limitPtr) { + return BIT_reloadDStreamFast(bitD); + } + if (bitD->ptr == bitD->start) { + if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer; + return BIT_DStream_completed; + } + /* start < ptr < limitPtr */ + { U32 nbBytes = bitD->bitsConsumed >> 3; + BIT_DStream_status result = BIT_DStream_unfinished; + if (bitD->ptr - nbBytes < bitD->start) { + nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */ + result = BIT_DStream_endOfBuffer; + } + bitD->ptr -= nbBytes; + bitD->bitsConsumed -= nbBytes*8; + bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */ + return result; + } +} + +/*! BIT_endOfDStream() : + * @return : 1 if DStream has _exactly_ reached its end (all bits consumed). + */ +MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream) +{ + return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8)); +} + + +#endif /* BITSTREAM_H_MODULE */ diff --git a/lib/zstd/common/compiler.h b/lib/zstd/common/compiler.h new file mode 100644 index 000000000000..9269b58a93e2 --- /dev/null +++ b/lib/zstd/common/compiler.h @@ -0,0 +1,151 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPILER_H +#define ZSTD_COMPILER_H + +/*-******************************************************* +* Compiler specifics +*********************************************************/ +/* force inlining */ + +#if (defined(__GNUC__) && !defined(__STRICT_ANSI__)) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */ +# define INLINE_KEYWORD inline +#else +# define INLINE_KEYWORD +#endif + +#define FORCE_INLINE_ATTR __attribute__((always_inline)) + + +/** + On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC). + This explictly marks such functions as __cdecl so that the code will still compile + if a CC other than __cdecl has been made the default. +*/ +#define WIN_CDECL + +/** + * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant + * parameters. They must be inlined for the compiler to eliminate the constant + * branches. + */ +#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR +/** + * HINT_INLINE is used to help the compiler generate better code. It is *not* + * used for "templates", so it can be tweaked based on the compilers + * performance. + * + * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the + * always_inline attribute. + * + * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline + * attribute. + */ +#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5 +# define HINT_INLINE static INLINE_KEYWORD +#else +# define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR +#endif + +/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */ +#define UNUSED_ATTR __attribute__((unused)) + +/* force no inlining */ +#define FORCE_NOINLINE static __attribute__((__noinline__)) + + +/* target attribute */ +#ifndef __has_attribute + #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ +#endif +#define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) + +/* Enable runtime BMI2 dispatch based on the CPU. + * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default. + */ +#ifndef DYNAMIC_BMI2 + #if ((defined(__clang__) && __has_attribute(__target__)) \ + || (defined(__GNUC__) \ + && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \ + && (defined(__x86_64__) || defined(_M_X86)) \ + && !defined(__BMI2__) + # define DYNAMIC_BMI2 1 + #else + # define DYNAMIC_BMI2 0 + #endif +#endif + +/* prefetch + * can be disabled, by declaring NO_PREFETCH build macro */ +#if ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) ) +# define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */) +# define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */) +#elif defined(__aarch64__) +# define PREFETCH_L1(ptr) __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))) +# define PREFETCH_L2(ptr) __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))) +#else +# define PREFETCH_L1(ptr) (void)(ptr) /* disabled */ +# define PREFETCH_L2(ptr) (void)(ptr) /* disabled */ +#endif /* NO_PREFETCH */ + +#define CACHELINE_SIZE 64 + +#define PREFETCH_AREA(p, s) { \ + const char* const _ptr = (const char*)(p); \ + size_t const _size = (size_t)(s); \ + size_t _pos; \ + for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \ + PREFETCH_L2(_ptr + _pos); \ + } \ +} + +/* vectorization + * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */ +#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) +# if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5) +# define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) +# else +# define DONT_VECTORIZE _Pragma("GCC optimize(\"no-tree-vectorize\")") +# endif +#else +# define DONT_VECTORIZE +#endif + +/* Tell the compiler that a branch is likely or unlikely. + * Only use these macros if it causes the compiler to generate better code. + * If you can remove a LIKELY/UNLIKELY annotation without speed changes in gcc + * and clang, please do. + */ +#define LIKELY(x) (__builtin_expect((x), 1)) +#define UNLIKELY(x) (__builtin_expect((x), 0)) + +/* disable warnings */ + +/*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/ + + +/* compat. with non-clang compilers */ +#ifndef __has_builtin +# define __has_builtin(x) 0 +#endif + +/* compat. with non-clang compilers */ +#ifndef __has_feature +# define __has_feature(x) 0 +#endif + +/* detects whether we are being compiled under msan */ + + +/* detects whether we are being compiled under asan */ + + +#endif /* ZSTD_COMPILER_H */ diff --git a/lib/zstd/common/cpu.h b/lib/zstd/common/cpu.h new file mode 100644 index 000000000000..0202d94076a3 --- /dev/null +++ b/lib/zstd/common/cpu.h @@ -0,0 +1,194 @@ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMMON_CPU_H +#define ZSTD_COMMON_CPU_H + +/** + * Implementation taken from folly/CpuId.h + * https://github.com/facebook/folly/blob/master/folly/CpuId.h + */ + +#include "mem.h" + + +typedef struct { + U32 f1c; + U32 f1d; + U32 f7b; + U32 f7c; +} ZSTD_cpuid_t; + +MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) { + U32 f1c = 0; + U32 f1d = 0; + U32 f7b = 0; + U32 f7c = 0; +#if defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__) + /* The following block like the normal cpuid branch below, but gcc + * reserves ebx for use of its pic register so we must specially + * handle the save and restore to avoid clobbering the register + */ + U32 n; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(n) + : "a"(0) + : "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "popl %%ebx\n\t" + : "=a"(f1a), "=c"(f1c), "=d"(f1d) + : "a"(1)); + } + if (n >= 7) { + __asm__( + "pushl %%ebx\n\t" + "cpuid\n\t" + "movl %%ebx, %%eax\n\t" + "popl %%ebx" + : "=a"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__) + U32 n; + __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx"); + if (n >= 1) { + U32 f1a; + __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx"); + } + if (n >= 7) { + U32 f7a; + __asm__("cpuid" + : "=a"(f7a), "=b"(f7b), "=c"(f7c) + : "a"(7), "c"(0) + : "edx"); + } +#endif + { + ZSTD_cpuid_t cpuid; + cpuid.f1c = f1c; + cpuid.f1d = f1d; + cpuid.f7b = f7b; + cpuid.f7c = f7c; + return cpuid; + } +} + +#define X(name, r, bit) \ + MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) { \ + return ((cpuid.r) & (1U << bit)) != 0; \ + } + +/* cpuid(1): Processor Info and Feature Bits. */ +#define C(name, bit) X(name, f1c, bit) + C(sse3, 0) + C(pclmuldq, 1) + C(dtes64, 2) + C(monitor, 3) + C(dscpl, 4) + C(vmx, 5) + C(smx, 6) + C(eist, 7) + C(tm2, 8) + C(ssse3, 9) + C(cnxtid, 10) + C(fma, 12) + C(cx16, 13) + C(xtpr, 14) + C(pdcm, 15) + C(pcid, 17) + C(dca, 18) + C(sse41, 19) + C(sse42, 20) + C(x2apic, 21) + C(movbe, 22) + C(popcnt, 23) + C(tscdeadline, 24) + C(aes, 25) + C(xsave, 26) + C(osxsave, 27) + C(avx, 28) + C(f16c, 29) + C(rdrand, 30) +#undef C +#define D(name, bit) X(name, f1d, bit) + D(fpu, 0) + D(vme, 1) + D(de, 2) + D(pse, 3) + D(tsc, 4) + D(msr, 5) + D(pae, 6) + D(mce, 7) + D(cx8, 8) + D(apic, 9) + D(sep, 11) + D(mtrr, 12) + D(pge, 13) + D(mca, 14) + D(cmov, 15) + D(pat, 16) + D(pse36, 17) + D(psn, 18) + D(clfsh, 19) + D(ds, 21) + D(acpi, 22) + D(mmx, 23) + D(fxsr, 24) + D(sse, 25) + D(sse2, 26) + D(ss, 27) + D(htt, 28) + D(tm, 29) + D(pbe, 31) +#undef D + +/* cpuid(7): Extended Features. */ +#define B(name, bit) X(name, f7b, bit) + B(bmi1, 3) + B(hle, 4) + B(avx2, 5) + B(smep, 7) + B(bmi2, 8) + B(erms, 9) + B(invpcid, 10) + B(rtm, 11) + B(mpx, 14) + B(avx512f, 16) + B(avx512dq, 17) + B(rdseed, 18) + B(adx, 19) + B(smap, 20) + B(avx512ifma, 21) + B(pcommit, 22) + B(clflushopt, 23) + B(clwb, 24) + B(avx512pf, 26) + B(avx512er, 27) + B(avx512cd, 28) + B(sha, 29) + B(avx512bw, 30) + B(avx512vl, 31) +#undef B +#define C(name, bit) X(name, f7c, bit) + C(prefetchwt1, 0) + C(avx512vbmi, 1) +#undef C + +#undef X + +#endif /* ZSTD_COMMON_CPU_H */ diff --git a/lib/zstd/common/debug.c b/lib/zstd/common/debug.c new file mode 100644 index 000000000000..bb863c9ea616 --- /dev/null +++ b/lib/zstd/common/debug.c @@ -0,0 +1,24 @@ +/* ****************************************************************** + * debug + * Part of FSE library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* + * This module only hosts one global variable + * which can be used to dynamically influence the verbosity of traces, + * such as DEBUGLOG and RAWLOG + */ + +#include "debug.h" + +int g_debuglevel = DEBUGLEVEL; diff --git a/lib/zstd/common/debug.h b/lib/zstd/common/debug.h new file mode 100644 index 000000000000..6dd88d1fbd02 --- /dev/null +++ b/lib/zstd/common/debug.h @@ -0,0 +1,101 @@ +/* ****************************************************************** + * debug + * Part of FSE library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* + * The purpose of this header is to enable debug functions. + * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time, + * and DEBUG_STATIC_ASSERT() for compile-time. + * + * By default, DEBUGLEVEL==0, which means run-time debug is disabled. + * + * Level 1 enables assert() only. + * Starting level 2, traces can be generated and pushed to stderr. + * The higher the level, the more verbose the traces. + * + * It's possible to dynamically adjust level using variable g_debug_level, + * which is only declared if DEBUGLEVEL>=2, + * and is a global variable, not multi-thread protected (use with care) + */ + +#ifndef DEBUG_H_12987983217 +#define DEBUG_H_12987983217 + + + +/* static assert is triggered at compile time, leaving no runtime artefact. + * static assert only works with compile-time constants. + * Also, this variant can only be used inside a function. */ +#define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1]) + + +/* DEBUGLEVEL is expected to be defined externally, + * typically through compiler command line. + * Value must be a number. */ +#ifndef DEBUGLEVEL +# define DEBUGLEVEL 0 +#endif + + +/* recommended values for DEBUGLEVEL : + * 0 : release mode, no debug, all run-time checks disabled + * 1 : enables assert() only, no display + * 2 : reserved, for currently active debug path + * 3 : events once per object lifetime (CCtx, CDict, etc.) + * 4 : events once per frame + * 5 : events once per block + * 6 : events once per sequence (verbose) + * 7+: events at every position (*very* verbose) + * + * It's generally inconvenient to output traces > 5. + * In which case, it's possible to selectively trigger high verbosity levels + * by modifying g_debug_level. + */ + +#if (DEBUGLEVEL>=1) +# define ZSTD_DEPS_NEED_ASSERT +# include "zstd_deps.h" +#else +# ifndef assert /* assert may be already defined, due to prior #include */ +# define assert(condition) ((void)0) /* disable assert (default) */ +# endif +#endif + +#if (DEBUGLEVEL>=2) +# define ZSTD_DEPS_NEED_IO +# include "zstd_deps.h" +extern int g_debuglevel; /* the variable is only declared, + it actually lives in debug.c, + and is shared by the whole process. + It's not thread-safe. + It's useful when enabling very verbose levels + on selective conditions (such as position in src) */ + +# define RAWLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__VA_ARGS__); \ + } } +# define DEBUGLOG(l, ...) { \ + if (l<=g_debuglevel) { \ + ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \ + ZSTD_DEBUG_PRINT(" \n"); \ + } } +#else +# define RAWLOG(l, ...) {} /* disabled */ +# define DEBUGLOG(l, ...) {} /* disabled */ +#endif + + + +#endif /* DEBUG_H_12987983217 */ diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c new file mode 100644 index 000000000000..53b47a2b52ff --- /dev/null +++ b/lib/zstd/common/entropy_common.c @@ -0,0 +1,357 @@ +/* ****************************************************************** + * Common functions of New Generation Entropy library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************* +* Dependencies +***************************************/ +#include "mem.h" +#include "error_private.h" /* ERR_*, ERROR */ +#define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */ +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */ +#include "huf.h" + + +/*=== Version ===*/ +unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } + + +/*=== Error Management ===*/ +unsigned FSE_isError(size_t code) { return ERR_isError(code); } +const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); } + +unsigned HUF_isError(size_t code) { return ERR_isError(code); } +const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); } + + +/*-************************************************************** +* FSE NCount encoding-decoding +****************************************************************/ +static U32 FSE_ctz(U32 val) +{ + assert(val != 0); + { +# if (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_ctz(val); +# else /* Software version */ + U32 count = 0; + while ((val & 1) == 0) { + val >>= 1; + ++count; + } + return count; +# endif + } +} + +FORCE_INLINE_TEMPLATE +size_t FSE_readNCount_body(short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + const BYTE* const istart = (const BYTE*) headerBuffer; + const BYTE* const iend = istart + hbSize; + const BYTE* ip = istart; + int nbBits; + int remaining; + int threshold; + U32 bitStream; + int bitCount; + unsigned charnum = 0; + unsigned const maxSV1 = *maxSVPtr + 1; + int previous0 = 0; + + if (hbSize < 8) { + /* This function only works when hbSize >= 8 */ + char buffer[8] = {0}; + ZSTD_memcpy(buffer, headerBuffer, hbSize); + { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr, + buffer, sizeof(buffer)); + if (FSE_isError(countSize)) return countSize; + if (countSize > hbSize) return ERROR(corruption_detected); + return countSize; + } } + assert(hbSize >= 8); + + /* init */ + ZSTD_memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */ + bitStream = MEM_readLE32(ip); + nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ + if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge); + bitStream >>= 4; + bitCount = 4; + *tableLogPtr = nbBits; + remaining = (1<> 1; + while (repeats >= 12) { + charnum += 3 * 12; + if (LIKELY(ip <= iend-7)) { + ip += 3; + } else { + bitCount -= (int)(8 * (iend - 7 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + repeats = FSE_ctz(~bitStream | 0x80000000) >> 1; + } + charnum += 3 * repeats; + bitStream >>= 2 * repeats; + bitCount += 2 * repeats; + + /* Add the final repeat which isn't 0b11. */ + assert((bitStream & 3) < 3); + charnum += bitStream & 3; + bitCount += 2; + + /* This is an error, but break and return an error + * at the end, because returning out of a loop makes + * it harder for the compiler to optimize. + */ + if (charnum >= maxSV1) break; + + /* We don't need to set the normalized count to 0 + * because we already memset the whole buffer to 0. + */ + + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + assert((bitCount >> 3) <= 3); /* For first condition to work */ + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + } + { + int const max = (2*threshold-1) - remaining; + int count; + + if ((bitStream & (threshold-1)) < (U32)max) { + count = bitStream & (threshold-1); + bitCount += nbBits-1; + } else { + count = bitStream & (2*threshold-1); + if (count >= threshold) count -= max; + bitCount += nbBits; + } + + count--; /* extra accuracy */ + /* When it matters (small blocks), this is a + * predictable branch, because we don't use -1. + */ + if (count >= 0) { + remaining -= count; + } else { + assert(count == -1); + remaining += count; + } + normalizedCounter[charnum++] = (short)count; + previous0 = !count; + + assert(threshold > 1); + if (remaining < threshold) { + /* This branch can be folded into the + * threshold update condition because we + * know that threshold > 1. + */ + if (remaining <= 1) break; + nbBits = BIT_highbit32(remaining) + 1; + threshold = 1 << (nbBits - 1); + } + if (charnum >= maxSV1) break; + + if (LIKELY(ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) { + ip += bitCount>>3; + bitCount &= 7; + } else { + bitCount -= (int)(8 * (iend - 4 - ip)); + bitCount &= 31; + ip = iend - 4; + } + bitStream = MEM_readLE32(ip) >> bitCount; + } } + if (remaining != 1) return ERROR(corruption_detected); + /* Only possible when there are too many zeros. */ + if (charnum > maxSV1) return ERROR(maxSymbolValue_tooSmall); + if (bitCount > 32) return ERROR(corruption_detected); + *maxSVPtr = charnum-1; + + ip += (bitCount+7)>>3; + return ip-istart; +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_readNCount_body_default( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_body(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} +#endif + +size_t FSE_readNCount_bmi2( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_readNCount_body_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); + } +#endif + (void)bmi2; + return FSE_readNCount_body_default(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize); +} + +size_t FSE_readNCount( + short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr, + const void* headerBuffer, size_t hbSize) +{ + return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0); +} + + +/*! HUF_readStats() : + Read compact Huffman tree, saved by HUF_writeCTable(). + `huffWeight` is destination buffer. + `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. + @return : size read from `src` , or an error Code . + Note : Needed by HUF_readCTable() and HUF_readDTableX?() . +*/ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize) +{ + U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0); +} + +FORCE_INLINE_TEMPLATE size_t +HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) +{ + U32 weightTotal; + const BYTE* ip = (const BYTE*) src; + size_t iSize; + size_t oSize; + + if (!srcSize) return ERROR(srcSize_wrong); + iSize = ip[0]; + /* ZSTD_memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */ + + if (iSize >= 128) { /* special header */ + oSize = iSize - 127; + iSize = ((oSize+1)/2); + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + if (oSize >= hwSize) return ERROR(corruption_detected); + ip += 1; + { U32 n; + for (n=0; n> 4; + huffWeight[n+1] = ip[n/2] & 15; + } } } + else { /* header compressed with FSE (normal case) */ + if (iSize+1 > srcSize) return ERROR(srcSize_wrong); + /* max (hwSize-1) values decoded, as last one is implied */ + oSize = FSE_decompress_wksp_bmi2(huffWeight, hwSize-1, ip+1, iSize, 6, workSpace, wkspSize, bmi2); + if (FSE_isError(oSize)) return oSize; + } + + /* collect weight stats */ + ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); + weightTotal = 0; + { U32 n; for (n=0; n= HUF_TABLELOG_MAX) return ERROR(corruption_detected); + rankStats[huffWeight[n]]++; + weightTotal += (1 << huffWeight[n]) >> 1; + } } + if (weightTotal == 0) return ERROR(corruption_detected); + + /* get last non-null symbol weight (implied, total must be 2^n) */ + { U32 const tableLog = BIT_highbit32(weightTotal) + 1; + if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); + *tableLogPtr = tableLog; + /* determine last weight */ + { U32 const total = 1 << tableLog; + U32 const rest = total - weightTotal; + U32 const verif = 1 << BIT_highbit32(rest); + U32 const lastWeight = BIT_highbit32(rest) + 1; + if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */ + huffWeight[oSize] = (BYTE)lastWeight; + rankStats[lastWeight]++; + } } + + /* check tree construction validity */ + if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ + + /* results */ + *nbSymbolsPtr = (U32)(oSize+1); + return iSize+1; +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_readStats_body(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize, 1); +} +#endif + +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats, + U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize, + int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return HUF_readStats_body_bmi2(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); + } +#endif + (void)bmi2; + return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize); +} diff --git a/lib/zstd/common/error_private.c b/lib/zstd/common/error_private.c new file mode 100644 index 000000000000..6d1135f8c373 --- /dev/null +++ b/lib/zstd/common/error_private.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* The purpose of this file is to have a single list of error strings embedded in binary */ + +#include "error_private.h" + +const char* ERR_getErrorString(ERR_enum code) +{ +#ifdef ZSTD_STRIP_ERROR_STRINGS + (void)code; + return "Error strings stripped"; +#else + static const char* const notErrorCode = "Unspecified error code"; + switch( code ) + { + case PREFIX(no_error): return "No error detected"; + case PREFIX(GENERIC): return "Error (generic)"; + case PREFIX(prefix_unknown): return "Unknown frame descriptor"; + case PREFIX(version_unsupported): return "Version not supported"; + case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter"; + case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding"; + case PREFIX(corruption_detected): return "Corrupted block detected"; + case PREFIX(checksum_wrong): return "Restored data doesn't match checksum"; + case PREFIX(parameter_unsupported): return "Unsupported parameter"; + case PREFIX(parameter_outOfBound): return "Parameter is out of bound"; + case PREFIX(init_missing): return "Context should be init first"; + case PREFIX(memory_allocation): return "Allocation error : not enough memory"; + case PREFIX(workSpace_tooSmall): return "workSpace buffer is not large enough"; + case PREFIX(stage_wrong): return "Operation not authorized at current processing stage"; + case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported"; + case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large"; + case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small"; + case PREFIX(dictionary_corrupted): return "Dictionary is corrupted"; + case PREFIX(dictionary_wrong): return "Dictionary mismatch"; + case PREFIX(dictionaryCreation_failed): return "Cannot create Dictionary from provided samples"; + case PREFIX(dstSize_tooSmall): return "Destination buffer is too small"; + case PREFIX(srcSize_wrong): return "Src size is incorrect"; + case PREFIX(dstBuffer_null): return "Operation on NULL destination buffer"; + /* following error codes are not stable and may be removed or changed in a future version */ + case PREFIX(frameIndex_tooLarge): return "Frame index is too large"; + case PREFIX(seekableIO): return "An I/O error occurred when reading/seeking"; + case PREFIX(dstBuffer_wrong): return "Destination buffer is wrong"; + case PREFIX(srcBuffer_wrong): return "Source buffer is wrong"; + case PREFIX(maxCode): + default: return notErrorCode; + } +#endif +} diff --git a/lib/zstd/common/error_private.h b/lib/zstd/common/error_private.h new file mode 100644 index 000000000000..d14e686adf95 --- /dev/null +++ b/lib/zstd/common/error_private.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* Note : this module is expected to remain private, do not expose it */ + +#ifndef ERROR_H_MODULE +#define ERROR_H_MODULE + + + +/* **************************************** +* Dependencies +******************************************/ +#include "zstd_deps.h" /* size_t */ +#include /* enum list */ + + +/* **************************************** +* Compiler-specific +******************************************/ +#define ERR_STATIC static __attribute__((unused)) + + +/*-**************************************** +* Customization (error_public.h) +******************************************/ +typedef ZSTD_ErrorCode ERR_enum; +#define PREFIX(name) ZSTD_error_##name + + +/*-**************************************** +* Error codes handling +******************************************/ +#undef ERROR /* already defined on Visual Studio */ +#define ERROR(name) ZSTD_ERROR(name) +#define ZSTD_ERROR(name) ((size_t)-PREFIX(name)) + +ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } + +ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); } + +/* check and forward error code */ +#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e +#define CHECK_F(f) { CHECK_V_F(_var_err__, f); } + + +/*-**************************************** +* Error Strings +******************************************/ + +const char* ERR_getErrorString(ERR_enum code); /* error_private.c */ + +ERR_STATIC const char* ERR_getErrorName(size_t code) +{ + return ERR_getErrorString(ERR_getErrorCode(code)); +} + + +#endif /* ERROR_H_MODULE */ diff --git a/lib/zstd/common/fse.h b/lib/zstd/common/fse.h new file mode 100644 index 000000000000..9e3f09ed1dfd --- /dev/null +++ b/lib/zstd/common/fse.h @@ -0,0 +1,710 @@ +/* ****************************************************************** + * FSE : Finite State Entropy codec + * Public Prototypes declaration + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +#ifndef FSE_H +#define FSE_H + + +/*-***************************************** +* Dependencies +******************************************/ +#include "zstd_deps.h" /* size_t, ptrdiff_t */ + + +/*-***************************************** +* FSE_PUBLIC_API : control library symbols visibility +******************************************/ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define FSE_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define FSE_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ +#else +# define FSE_PUBLIC_API +#endif + +/*------ Version ------*/ +#define FSE_VERSION_MAJOR 0 +#define FSE_VERSION_MINOR 9 +#define FSE_VERSION_RELEASE 0 + +#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE +#define FSE_QUOTE(str) #str +#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) +#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) + +#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE) +FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ + + +/*-**************************************** +* FSE simple functions +******************************************/ +/*! FSE_compress() : + Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'. + 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize). + @return : size of compressed data (<= dstCapacity). + Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead. + if FSE_isError(return), compression failed (more details using FSE_getErrorName()) +*/ +FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/*! FSE_decompress(): + Decompress FSE data from buffer 'cSrc', of size 'cSrcSize', + into already allocated destination buffer 'dst', of size 'dstCapacity'. + @return : size of regenerated data (<= maxDstSize), + or an error code, which can be tested using FSE_isError() . + + ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!! + Why ? : making this distinction requires a header. + Header management is intentionally delegated to the user layer, which can better manage special cases. +*/ +FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize); + + +/*-***************************************** +* Tool functions +******************************************/ +FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ + +/* Error Management */ +FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ +FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */ + + +/*-***************************************** +* FSE advanced functions +******************************************/ +/*! FSE_compress2() : + Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog' + Both parameters can be defined as '0' to mean : use default value + @return : size of compressed data + Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!! + if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression. + if FSE_isError(return), it's an error code. +*/ +FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); + + +/*-***************************************** +* FSE detailed API +******************************************/ +/*! +FSE_compress() does the following: +1. count symbol occurrence from source[] into table count[] (see hist.h) +2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) +3. save normalized counters to memory buffer using writeNCount() +4. build encoding table 'CTable' from normalized counters +5. encode the data stream using encoding table 'CTable' + +FSE_decompress() does the following: +1. read normalized counters with readNCount() +2. build decoding table 'DTable' from normalized counters +3. decode the data stream using decoding table 'DTable' + +The following API allows targeting specific sub-functions for advanced tasks. +For example, it's possible to compress several blocks using the same 'CTable', +or to save and provide normalized distribution using external method. +*/ + +/* *** COMPRESSION *** */ + +/*! FSE_optimalTableLog(): + dynamically downsize 'tableLog' when conditions are met. + It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. + @return : recommended tableLog (necessarily <= 'maxTableLog') */ +FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); + +/*! FSE_normalizeCount(): + normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) + 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). + useLowProbCount is a boolean parameter which trades off compressed size for + faster header decoding. When it is set to 1, the compressed data will be slightly + smaller. And when it is set to 0, FSE_readNCount() and FSE_buildDTable() will be + faster. If you are compressing a small amount of data (< 2 KB) then useLowProbCount=0 + is a good default, since header deserialization makes a big speed difference. + Otherwise, useLowProbCount=1 is a good default, since the speed difference is small. + @return : tableLog, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t srcSize, unsigned maxSymbolValue, unsigned useLowProbCount); + +/*! FSE_NCountWriteBound(): + Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. + Typically useful for allocation purpose. */ +FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_writeNCount(): + Compactly save 'normalizedCounter' into 'buffer'. + @return : size of the compressed table, + or an errorCode, which can be tested using FSE_isError(). */ +FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, + unsigned maxSymbolValue, unsigned tableLog); + +/*! Constructor and Destructor of FSE_CTable. + Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ +typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ +FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog); +FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct); + +/*! FSE_buildCTable(): + Builds `ct`, which must be already allocated, using FSE_createCTable(). + @return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_compress_usingCTable(): + Compress `src` using `ct` into `dst` which must be already allocated. + @return : size of compressed data (<= `dstCapacity`), + or 0 if compressed data could not fit into `dst`, + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct); + +/*! +Tutorial : +---------- +The first step is to count all symbols. FSE_count() does this job very fast. +Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. +'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] +maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) +FSE_count() will return the number of occurrence of the most frequent symbol. +This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). + +The next step is to normalize the frequencies. +FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. +It also guarantees a minimum of 1 to any Symbol with frequency >= 1. +You can use 'tableLog'==0 to mean "use default tableLog value". +If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), +which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). + +The result of FSE_normalizeCount() will be saved into a table, +called 'normalizedCounter', which is a table of signed short. +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. +The return value is tableLog if everything proceeded as expected. +It is 0 if there is a single symbol within distribution. +If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). + +'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). +'buffer' must be already allocated. +For guaranteed success, buffer size must be at least FSE_headerBound(). +The result of the function is the number of bytes written into 'buffer'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). + +'normalizedCounter' can then be used to create the compression table 'CTable'. +The space required by 'CTable' must be already allocated, using FSE_createCTable(). +You can then use FSE_buildCTable() to fill 'CTable'. +If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). + +'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). +Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' +The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. +If it returns '0', compressed data could not fit into 'dst'. +If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). +*/ + + +/* *** DECOMPRESSION *** */ + +/*! FSE_readNCount(): + Read compactly saved 'normalizedCounter' from 'rBuffer'. + @return : size read from 'rBuffer', + or an errorCode, which can be tested using FSE_isError(). + maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ +FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize); + +/*! FSE_readNCount_bmi2(): + * Same as FSE_readNCount() but pass bmi2=1 when your CPU supports BMI2 and 0 otherwise. + */ +FSE_PUBLIC_API size_t FSE_readNCount_bmi2(short* normalizedCounter, + unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, + const void* rBuffer, size_t rBuffSize, int bmi2); + +/*! Constructor and Destructor of FSE_DTable. + Note that its size depends on 'tableLog' */ +typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ +FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog); +FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt); + +/*! FSE_buildDTable(): + Builds 'dt', which must be already allocated, using FSE_createDTable(). + return : 0, or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); + +/*! FSE_decompress_usingDTable(): + Decompress compressed source `cSrc` of size `cSrcSize` using `dt` + into `dst` which must be already allocated. + @return : size of regenerated data (necessarily <= `dstCapacity`), + or an errorCode, which can be tested using FSE_isError() */ +FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt); + +/*! +Tutorial : +---------- +(Note : these functions only decompress FSE-compressed blocks. + If block is uncompressed, use memcpy() instead + If block is a single repeated byte, use memset() instead ) + +The first step is to obtain the normalized frequencies of symbols. +This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). +'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. +In practice, that means it's necessary to know 'maxSymbolValue' beforehand, +or size the table to handle worst case situations (typically 256). +FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. +The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. +Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. +This is performed by the function FSE_buildDTable(). +The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). + +`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). +`cSrcSize` must be strictly correct, otherwise decompression will fail. +FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). +If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) +*/ + +#endif /* FSE_H */ + +#if !defined(FSE_H_FSE_STATIC_LINKING_ONLY) +#define FSE_H_FSE_STATIC_LINKING_ONLY + +/* *** Dependency *** */ +#include "bitstream.h" + + +/* ***************************************** +* Static allocation +*******************************************/ +/* FSE buffer bounds */ +#define FSE_NCOUNTBOUND 512 +#define FSE_BLOCKBOUND(size) ((size) + ((size)>>7) + 4 /* fse states */ + sizeof(size_t) /* bitContainer */) +#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ +#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<((maxTableLog)-1)) + (((maxSymbolValue)+1)*2)) +#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<(maxTableLog))) + +/* or use the size to malloc() space directly. Pay attention to alignment restrictions though */ +#define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable)) +#define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable)) + + +/* ***************************************** + * FSE advanced API + ***************************************** */ + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); +/**< same as FSE_optimalTableLog(), which used `minus==2` */ + +/* FSE_compress_wksp() : + * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). + * FSE_COMPRESS_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. + */ +#define FSE_COMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) ) +size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); +/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ + +size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); +/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`. + */ +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2))) +#define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)) +size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); + +#define FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) (sizeof(short) * (maxSymbolValue + 1) + (1ULL << maxTableLog) + 8) +#define FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ((FSE_BUILD_DTABLE_WKSP_SIZE(maxTableLog, maxSymbolValue) + sizeof(unsigned) - 1) / sizeof(unsigned)) +FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); +/**< Same as FSE_buildDTable(), using an externally allocated `workspace` produced with `FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxSymbolValue)` */ + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); +/**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ + +size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); +/**< build a fake FSE_DTable, designed to always generate the same symbolValue */ + +#define FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) (FSE_DTABLE_SIZE_U32(maxTableLog) + FSE_BUILD_DTABLE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) + (FSE_MAX_SYMBOL_VALUE + 1) / 2 + 1) +#define FSE_DECOMPRESS_WKSP_SIZE(maxTableLog, maxSymbolValue) (FSE_DECOMPRESS_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(unsigned)) +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize); +/**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DECOMPRESS_WKSP_SIZE_U32(maxLog, maxSymbolValue)` */ + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2); +/**< Same as FSE_decompress_wksp() but with dynamic BMI2 support. Pass 1 if your CPU supports BMI2 or 0 if it doesn't. */ + +typedef enum { + FSE_repeat_none, /**< Cannot use the previous table */ + FSE_repeat_check, /**< Can use the previous table but it must be checked */ + FSE_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } FSE_repeat; + +/* ***************************************** +* FSE symbol compression API +*******************************************/ +/*! + This API consists of small unitary functions, which highly benefit from being inlined. + Hence their body are included in next section. +*/ +typedef struct { + ptrdiff_t value; + const void* stateTable; + const void* symbolTT; + unsigned stateLog; +} FSE_CState_t; + +static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct); + +static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol); + +static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr); + +/**< +These functions are inner components of FSE_compress_usingCTable(). +They allow the creation of custom streams, mixing multiple tables and bit sources. + +A key property to keep in mind is that encoding and decoding are done **in reverse direction**. +So the first symbol you will encode is the last you will decode, like a LIFO stack. + +You will need a few variables to track your CStream. They are : + +FSE_CTable ct; // Provided by FSE_buildCTable() +BIT_CStream_t bitStream; // bitStream tracking structure +FSE_CState_t state; // State tracking structure (can have several) + + +The first thing to do is to init bitStream and state. + size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize); + FSE_initCState(&state, ct); + +Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError(); +You can then encode your input data, byte after byte. +FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time. +Remember decoding will be done in reverse direction. + FSE_encodeByte(&bitStream, &state, symbol); + +At any time, you can also add any bit sequence. +Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders + BIT_addBits(&bitStream, bitField, nbBits); + +The above methods don't commit data to memory, they just store it into local register, for speed. +Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t). +Writing data to memory is a manual operation, performed by the flushBits function. + BIT_flushBits(&bitStream); + +Your last FSE encoding operation shall be to flush your last state value(s). + FSE_flushState(&bitStream, &state); + +Finally, you must close the bitStream. +The function returns the size of CStream in bytes. +If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible) +If there is an error, it returns an errorCode (which can be tested using FSE_isError()). + size_t size = BIT_closeCStream(&bitStream); +*/ + + +/* ***************************************** +* FSE symbol decompression API +*******************************************/ +typedef struct { + size_t state; + const void* table; /* precise table may vary, depending on U16 */ +} FSE_DState_t; + + +static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt); + +static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); + +static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr); + +/**< +Let's now decompose FSE_decompress_usingDTable() into its unitary components. +You will decode FSE-encoded symbols from the bitStream, +and also any other bitFields you put in, **in reverse order**. + +You will need a few variables to track your bitStream. They are : + +BIT_DStream_t DStream; // Stream context +FSE_DState_t DState; // State context. Multiple ones are possible +FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable() + +The first thing to do is to init the bitStream. + errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize); + +You should then retrieve your initial state(s) +(in reverse flushing order if you have several ones) : + errorCode = FSE_initDState(&DState, &DStream, DTablePtr); + +You can then decode your data, symbol after symbol. +For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'. +Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out). + unsigned char symbol = FSE_decodeSymbol(&DState, &DStream); + +You can retrieve any bitfield you eventually stored into the bitStream (in reverse order) +Note : maximum allowed nbBits is 25, for 32-bits compatibility + size_t bitField = BIT_readBits(&DStream, nbBits); + +All above operations only read from local register (which size depends on size_t). +Refueling the register from memory is manually performed by the reload method. + endSignal = FSE_reloadDStream(&DStream); + +BIT_reloadDStream() result tells if there is still some more data to read from DStream. +BIT_DStream_unfinished : there is still some data left into the DStream. +BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled. +BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed. +BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted. + +When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop, +to properly detect the exact end of stream. +After each decoded symbol, check if DStream is fully consumed using this simple test : + BIT_reloadDStream(&DStream) >= BIT_DStream_completed + +When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. +Checking if DStream has reached its end is performed by : + BIT_endOfDStream(&DStream); +Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. + FSE_endOfDState(&DState); +*/ + + +/* ***************************************** +* FSE unsafe API +*******************************************/ +static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD); +/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ + + +/* ***************************************** +* Implementation of inlined functions +*******************************************/ +typedef struct { + int deltaFindState; + U32 deltaNbBits; +} FSE_symbolCompressionTransform; /* total 8 bytes */ + +MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct) +{ + const void* ptr = ct; + const U16* u16ptr = (const U16*) ptr; + const U32 tableLog = MEM_read16(ptr); + statePtr->value = (ptrdiff_t)1<stateTable = u16ptr+2; + statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1); + statePtr->stateLog = tableLog; +} + + +/*! FSE_initCState2() : +* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) +* uses the smallest state value possible, saving the cost of this symbol */ +MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol) +{ + FSE_initCState(statePtr, ct); + { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* stateTable = (const U16*)(statePtr->stateTable); + U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16); + statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; + statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; + } +} + +MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol) +{ + FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol]; + const U16* const stateTable = (const U16*)(statePtr->stateTable); + U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); + BIT_addBits(bitC, statePtr->value, nbBitsOut); + statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; +} + +MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr) +{ + BIT_addBits(bitC, statePtr->value, statePtr->stateLog); + BIT_flushBits(bitC); +} + + +/* FSE_getMaxNbBits() : + * Approximate maximum cost of a symbol, in bits. + * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16; +} + +/* FSE_bitCost() : + * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) + * note 1 : assume symbolValue is valid (<= maxSymbolValue) + * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */ +MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog) +{ + const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr; + U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16; + U32 const threshold = (minNbBits+1) << 16; + assert(tableLog < 16); + assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */ + { U32 const tableSize = 1 << tableLog; + U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize); + U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */ + U32 const bitMultiplier = 1 << accuracyLog; + assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold); + assert(normalizedDeltaFromThreshold <= bitMultiplier); + return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold; + } +} + + +/* ====== Decompression ====== */ + +typedef struct { + U16 tableLog; + U16 fastMode; +} FSE_DTableHeader; /* sizeof U32 */ + +typedef struct +{ + unsigned short newState; + unsigned char symbol; + unsigned char nbBits; +} FSE_decode_t; /* size == U32 */ + +MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + return DInfo.symbol; +} + +MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.newState + lowBits; +} + +MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBits(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +/*! FSE_decodeSymbolFast() : + unsafe, only works if no symbol has a probability > 50% */ +MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD) +{ + FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + BYTE const symbol = DInfo.symbol; + size_t const lowBits = BIT_readBitsFast(bitD, nbBits); + + DStatePtr->state = DInfo.newState + lowBits; + return symbol; +} + +MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr) +{ + return DStatePtr->state == 0; +} + + + +#ifndef FSE_COMMONDEFS_ONLY + +/* ************************************************************** +* Tuning parameters +****************************************************************/ +/*!MEMORY_USAGE : +* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) +* Increasing memory usage improves compression ratio +* Reduced memory usage can improve speed, due to cache effect +* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ +#ifndef FSE_MAX_MEMORY_USAGE +# define FSE_MAX_MEMORY_USAGE 14 +#endif +#ifndef FSE_DEFAULT_MEMORY_USAGE +# define FSE_DEFAULT_MEMORY_USAGE 13 +#endif +#if (FSE_DEFAULT_MEMORY_USAGE > FSE_MAX_MEMORY_USAGE) +# error "FSE_DEFAULT_MEMORY_USAGE must be <= FSE_MAX_MEMORY_USAGE" +#endif + +/*!FSE_MAX_SYMBOL_VALUE : +* Maximum symbol value authorized. +* Required for proper stack allocation */ +#ifndef FSE_MAX_SYMBOL_VALUE +# define FSE_MAX_SYMBOL_VALUE 255 +#endif + +/* ************************************************************** +* template functions type & suffix +****************************************************************/ +#define FSE_FUNCTION_TYPE BYTE +#define FSE_FUNCTION_EXTENSION +#define FSE_DECODE_TYPE FSE_decode_t + + +#endif /* !FSE_COMMONDEFS_ONLY */ + + +/* *************************************************************** +* Constants +*****************************************************************/ +#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2) +#define FSE_MAX_TABLESIZE (1U< FSE_TABLELOG_ABSOLUTE_MAX +# error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" +#endif + +#define FSE_TABLESTEP(tableSize) (((tableSize)>>1) + ((tableSize)>>3) + 3) + + +#endif /* FSE_STATIC_LINKING_ONLY */ + + diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c new file mode 100644 index 000000000000..2c8bbe3e4c14 --- /dev/null +++ b/lib/zstd/common/fse_decompress.c @@ -0,0 +1,390 @@ +/* ****************************************************************** + * FSE : Finite State Entropy decoder + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +/* ************************************************************** +* Includes +****************************************************************/ +#include "debug.h" /* assert */ +#include "bitstream.h" +#include "compiler.h" +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#include "error_private.h" +#define ZSTD_DEPS_NEED_MALLOC +#include "zstd_deps.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError +#define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ +FSE_DTable* FSE_createDTable (unsigned tableLog) +{ + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + return (FSE_DTable*)ZSTD_malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) ); +} + +void FSE_freeDTable (FSE_DTable* dt) +{ + ZSTD_free(dt); +} + +static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) +{ + void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ + FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr); + U16* symbolNext = (U16*)workSpace; + BYTE* spread = (BYTE*)(symbolNext + maxSymbolValue + 1); + + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + U32 highThreshold = tableSize-1; + + /* Sanity Checks */ + if (FSE_BUILD_DTABLE_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(maxSymbolValue_tooLarge); + if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge); + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); + + /* Init, lay down lowprob symbols */ + { FSE_DTableHeader DTableH; + DTableH.tableLog = (U16)tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + symbolNext[s] = normalizedCounter[s]; + } } } + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { U32 u; + for (u=0; utableLog = 0; + DTableH->fastMode = 0; + + cell->newState = 0; + cell->symbol = symbolValue; + cell->nbBits = 0; + + return 0; +} + + +size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits) +{ + void* ptr = dt; + FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr; + void* dPtr = dt + 1; + FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr; + const unsigned tableSize = 1 << nbBits; + const unsigned tableMask = tableSize - 1; + const unsigned maxSV1 = tableMask+1; + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* Build Decoding Table */ + DTableH->tableLog = (U16)nbBits; + DTableH->fastMode = 1; + for (s=0; s sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[1] = FSE_GETSYMBOL(&state2); + + if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } } + + op[2] = FSE_GETSYMBOL(&state1); + + if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */ + BIT_reloadDStream(&bitD); + + op[3] = FSE_GETSYMBOL(&state2); + } + + /* tail */ + /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ + while (1) { + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state1); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state2); + break; + } + + if (op>(omax-2)) return ERROR(dstSize_tooSmall); + *op++ = FSE_GETSYMBOL(&state2); + if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) { + *op++ = FSE_GETSYMBOL(&state1); + break; + } } + + return op-ostart; +} + + +size_t FSE_decompress_usingDTable(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize, + const FSE_DTable* dt) +{ + const void* ptr = dt; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); + return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); +} + + +size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, /* bmi2 */ 0); +} + +typedef struct { + short ncount[FSE_MAX_SYMBOL_VALUE + 1]; + FSE_DTable dtable[1]; /* Dynamically sized */ +} FSE_DecompressWksp; + + +FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body( + void* dst, size_t dstCapacity, + const void* cSrc, size_t cSrcSize, + unsigned maxLog, void* workSpace, size_t wkspSize, + int bmi2) +{ + const BYTE* const istart = (const BYTE*)cSrc; + const BYTE* ip = istart; + unsigned tableLog; + unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; + FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace; + + DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0); + if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC); + + /* normal FSE decoding mode */ + { + size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2); + if (FSE_isError(NCountLength)) return NCountLength; + if (tableLog > maxLog) return ERROR(tableLog_tooLarge); + assert(NCountLength <= cSrcSize); + ip += NCountLength; + cSrcSize -= NCountLength; + } + + if (FSE_DECOMPRESS_WKSP_SIZE(tableLog, maxSymbolValue) > wkspSize) return ERROR(tableLog_tooLarge); + workSpace = wksp->dtable + FSE_DTABLE_SIZE_U32(tableLog); + wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog); + + CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) ); + + { + const void* ptr = wksp->dtable; + const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr; + const U32 fastMode = DTableH->fastMode; + + /* select fast mode (static) */ + if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1); + return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0); + } +} + +/* Avoids the FORCE_INLINE of the _body() function. */ +static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 0); +} + +#if DYNAMIC_BMI2 +TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize) +{ + return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1); +} +#endif + +size_t FSE_decompress_wksp_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize, int bmi2) +{ +#if DYNAMIC_BMI2 + if (bmi2) { + return FSE_decompress_wksp_body_bmi2(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); + } +#endif + (void)bmi2; + return FSE_decompress_wksp_body_default(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize); +} + + +typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; + + + +#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/lib/zstd/common/huf.h b/lib/zstd/common/huf.h new file mode 100644 index 000000000000..49939016c623 --- /dev/null +++ b/lib/zstd/common/huf.h @@ -0,0 +1,356 @@ +/* ****************************************************************** + * huff0 huffman codec, + * part of Finite State Entropy library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + + +#ifndef HUF_H_298734234 +#define HUF_H_298734234 + +/* *** Dependencies *** */ +#include "zstd_deps.h" /* size_t */ + + +/* *** library symbols visibility *** */ +/* Note : when linking with -fvisibility=hidden on gcc, or by default on Visual, + * HUF symbols remain "private" (internal symbols for library only). + * Set macro FSE_DLL_EXPORT to 1 if you want HUF symbols visible on DLL interface */ +#if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4) +# define HUF_PUBLIC_API __attribute__ ((visibility ("default"))) +#elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */ +# define HUF_PUBLIC_API __declspec(dllexport) +#elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1) +# define HUF_PUBLIC_API __declspec(dllimport) /* not required, just to generate faster code (saves a function pointer load from IAT and an indirect jump) */ +#else +# define HUF_PUBLIC_API +#endif + + +/* ========================== */ +/* *** simple functions *** */ +/* ========================== */ + +/** HUF_compress() : + * Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'. + * 'dst' buffer must be already allocated. + * Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize). + * `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB. + * @return : size of compressed data (<= `dstCapacity`). + * Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!! + * if HUF_isError(return), compression failed (more details using HUF_getErrorName()) + */ +HUF_PUBLIC_API size_t HUF_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize); + +/** HUF_decompress() : + * Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', + * into already allocated buffer 'dst', of minimum size 'dstSize'. + * `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. + * Note : in contrast with FSE, HUF_decompress can regenerate + * RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, + * because it knows size to regenerate (originalSize). + * @return : size of regenerated data (== originalSize), + * or an error code, which can be tested using HUF_isError() + */ +HUF_PUBLIC_API size_t HUF_decompress(void* dst, size_t originalSize, + const void* cSrc, size_t cSrcSize); + + +/* *** Tool functions *** */ +#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ +HUF_PUBLIC_API size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ + +/* Error Management */ +HUF_PUBLIC_API unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ +HUF_PUBLIC_API const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ + + +/* *** Advanced function *** */ + +/** HUF_compress2() : + * Same as HUF_compress(), but offers control over `maxSymbolValue` and `tableLog`. + * `maxSymbolValue` must be <= HUF_SYMBOLVALUE_MAX . + * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ +HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog); + +/** HUF_compress4X_wksp() : + * Same as HUF_compress2(), but uses externally allocated `workSpace`. + * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */ +#define HUF_WORKSPACE_SIZE ((6 << 10) + 256) +#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32)) +HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize); + +#endif /* HUF_H_298734234 */ + +/* ****************************************************************** + * WARNING !! + * The following section contains advanced and experimental definitions + * which shall never be used in the context of a dynamic library, + * because they are not guaranteed to remain stable in the future. + * Only consider them in association with static linking. + * *****************************************************************/ +#if !defined(HUF_H_HUF_STATIC_LINKING_ONLY) +#define HUF_H_HUF_STATIC_LINKING_ONLY + +/* *** Dependencies *** */ +#include "mem.h" /* U32 */ +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" + + +/* *** Constants *** */ +#define HUF_TABLELOG_MAX 12 /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ +#define HUF_TABLELOG_DEFAULT 11 /* default tableLog value when none specified */ +#define HUF_SYMBOLVALUE_MAX 255 + +#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ +#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) +# error "HUF_TABLELOG_MAX is too large !" +#endif + + +/* **************************************** +* Static allocation +******************************************/ +/* HUF buffer bounds */ +#define HUF_CTABLEBOUND 129 +#define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true when incompressible is pre-filtered with fast heuristic */ +#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ + +/* static allocation of HUF's Compression Table */ +/* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */ +struct HUF_CElt_s { + U16 val; + BYTE nbBits; +}; /* typedef'd to HUF_CElt */ +typedef struct HUF_CElt_s HUF_CElt; /* consider it an incomplete type */ +#define HUF_CTABLE_SIZE_U32(maxSymbolValue) ((maxSymbolValue)+1) /* Use tables of U32, for proper alignment */ +#define HUF_CTABLE_SIZE(maxSymbolValue) (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32)) +#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ + HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */ + +/* static allocation of HUF's DTable */ +typedef U32 HUF_DTable; +#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) +#define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } +#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ + HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } + + +/* **************************************** +* Advanced decompression functions +******************************************/ +size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +#endif + +size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */ +size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */ +size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif + + +/* **************************************** + * HUF detailed API + * ****************************************/ + +/*! HUF_compress() does the following: + * 1. count symbol occurrence from source[] into table count[] using FSE_count() (exposed within "fse.h") + * 2. (optional) refine tableLog using HUF_optimalTableLog() + * 3. build Huffman table from count using HUF_buildCTable() + * 4. save Huffman table to memory buffer using HUF_writeCTable() + * 5. encode the data stream using HUF_compress4X_usingCTable() + * + * The following API allows targeting specific sub-functions for advanced tasks. + * For example, it's possible to compress several blocks using the same 'CTable', + * or to save and regenerate 'CTable' using external methods. + */ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); +size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits); /* @return : maxNbBits; CTable and count can overlap. In which case, CTable will overwrite count content */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog); +size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize); +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue); + +typedef enum { + HUF_repeat_none, /**< Cannot use the previous table */ + HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */ + HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */ + } HUF_repeat; +/** HUF_compress4X_repeat() : + * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress4X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and its size must be >= HUF_CTABLE_WORKSPACE_SIZE. + */ +#define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1) +#define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_buildCTable_wksp (HUF_CElt* tree, + const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, + void* workSpace, size_t wkspSize); + +/*! HUF_readStats() : + * Read compact Huffman tree, saved by HUF_writeCTable(). + * `huffWeight` is destination buffer. + * @return : size read from `src` , or an error Code . + * Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ +size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize); + +/*! HUF_readStats_wksp() : + * Same as HUF_readStats() but takes an external workspace which must be + * 4-byte aligned and its size must be >= HUF_READ_STATS_WORKSPACE_SIZE. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +#define HUF_READ_STATS_WORKSPACE_SIZE_U32 FSE_DECOMPRESS_WKSP_SIZE_U32(6, HUF_TABLELOG_MAX-1) +#define HUF_READ_STATS_WORKSPACE_SIZE (HUF_READ_STATS_WORKSPACE_SIZE_U32 * sizeof(unsigned)) +size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, + U32* rankStats, U32* nbSymbolsPtr, U32* tableLogPtr, + const void* src, size_t srcSize, + void* workspace, size_t wkspSize, + int bmi2); + +/** HUF_readCTable() : + * Loading a CTable saved with HUF_writeCTable() */ +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights); + +/** HUF_getNbBits() : + * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX + * Note 1 : is not inlined, as HUF_CElt definition is private + * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */ +U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue); + +/* + * HUF_decompress() does the following: + * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics + * 2. build Huffman table from save, using HUF_readDTableX?() + * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable() + */ + +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize); + +/** + * The minimum workspace size for the `workSpace` used in + * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp(). + * + * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when + * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15. + * Buffer overflow errors may potentially occur if code modifications result in + * a required workspace size greater than that specified in the following + * macro. + */ +#define HUF_DECOMPRESS_WORKSPACE_SIZE ((2 << 10) + (1 << 9)) +#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize); +size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize); +#endif + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif + + +/* ====================== */ +/* single stream variants */ +/* ====================== */ + +size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */ +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); +/** HUF_compress1X_repeat() : + * Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. + * If it uses hufTable it does not modify hufTable or repeat. + * If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. + * If preferRepeat then the old table will always be used if valid. */ +size_t HUF_compress1X_repeat(void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned tableLog, + void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */ + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2); + +size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ +#endif + +size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); +size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */ +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */ +#endif + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif +#ifndef HUF_FORCE_DECOMPRESS_X1 +size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); +#endif + +/* BMI2 variants. + * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0. + */ +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2); +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2); +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2); +#endif + +#endif /* HUF_STATIC_LINKING_ONLY */ + diff --git a/lib/zstd/common/mem.h b/lib/zstd/common/mem.h new file mode 100644 index 000000000000..4b5db5756a6f --- /dev/null +++ b/lib/zstd/common/mem.h @@ -0,0 +1,259 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef MEM_H_MODULE +#define MEM_H_MODULE + +/*-**************************************** +* Dependencies +******************************************/ +#include /* get_unaligned, put_unaligned* */ +#include /* inline */ +#include /* swab32, swab64 */ +#include /* size_t, ptrdiff_t */ +#include "debug.h" /* DEBUG_STATIC_ASSERT */ + +/*-**************************************** +* Compiler specifics +******************************************/ +#define MEM_STATIC static inline + +/*-************************************************************** +* Basic Types +*****************************************************************/ +typedef uint8_t BYTE; +typedef uint16_t U16; +typedef int16_t S16; +typedef uint32_t U32; +typedef int32_t S32; +typedef uint64_t U64; +typedef int64_t S64; + +/*-************************************************************** +* Memory I/O API +*****************************************************************/ +/*=== Static platform detection ===*/ +MEM_STATIC unsigned MEM_32bits(void); +MEM_STATIC unsigned MEM_64bits(void); +MEM_STATIC unsigned MEM_isLittleEndian(void); + +/*=== Native unaligned read/write ===*/ +MEM_STATIC U16 MEM_read16(const void* memPtr); +MEM_STATIC U32 MEM_read32(const void* memPtr); +MEM_STATIC U64 MEM_read64(const void* memPtr); +MEM_STATIC size_t MEM_readST(const void* memPtr); + +MEM_STATIC void MEM_write16(void* memPtr, U16 value); +MEM_STATIC void MEM_write32(void* memPtr, U32 value); +MEM_STATIC void MEM_write64(void* memPtr, U64 value); + +/*=== Little endian unaligned read/write ===*/ +MEM_STATIC U16 MEM_readLE16(const void* memPtr); +MEM_STATIC U32 MEM_readLE24(const void* memPtr); +MEM_STATIC U32 MEM_readLE32(const void* memPtr); +MEM_STATIC U64 MEM_readLE64(const void* memPtr); +MEM_STATIC size_t MEM_readLEST(const void* memPtr); + +MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val); +MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val); +MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val); + +/*=== Big endian unaligned read/write ===*/ +MEM_STATIC U32 MEM_readBE32(const void* memPtr); +MEM_STATIC U64 MEM_readBE64(const void* memPtr); +MEM_STATIC size_t MEM_readBEST(const void* memPtr); + +MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32); +MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64); +MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val); + +/*=== Byteswap ===*/ +MEM_STATIC U32 MEM_swap32(U32 in); +MEM_STATIC U64 MEM_swap64(U64 in); +MEM_STATIC size_t MEM_swapST(size_t in); + +/*-************************************************************** +* Memory I/O Implementation +*****************************************************************/ +MEM_STATIC unsigned MEM_32bits(void) +{ + return sizeof(size_t) == 4; +} + +MEM_STATIC unsigned MEM_64bits(void) +{ + return sizeof(size_t) == 8; +} + +#if defined(__LITTLE_ENDIAN) +#define MEM_LITTLE_ENDIAN 1 +#else +#define MEM_LITTLE_ENDIAN 0 +#endif + +MEM_STATIC unsigned MEM_isLittleEndian(void) +{ + return MEM_LITTLE_ENDIAN; +} + +MEM_STATIC U16 MEM_read16(const void *memPtr) +{ + return get_unaligned((const U16 *)memPtr); +} + +MEM_STATIC U32 MEM_read32(const void *memPtr) +{ + return get_unaligned((const U32 *)memPtr); +} + +MEM_STATIC U64 MEM_read64(const void *memPtr) +{ + return get_unaligned((const U64 *)memPtr); +} + +MEM_STATIC size_t MEM_readST(const void *memPtr) +{ + return get_unaligned((const size_t *)memPtr); +} + +MEM_STATIC void MEM_write16(void *memPtr, U16 value) +{ + put_unaligned(value, (U16 *)memPtr); +} + +MEM_STATIC void MEM_write32(void *memPtr, U32 value) +{ + put_unaligned(value, (U32 *)memPtr); +} + +MEM_STATIC void MEM_write64(void *memPtr, U64 value) +{ + put_unaligned(value, (U64 *)memPtr); +} + +/*=== Little endian r/w ===*/ + +MEM_STATIC U16 MEM_readLE16(const void *memPtr) +{ + return get_unaligned_le16(memPtr); +} + +MEM_STATIC void MEM_writeLE16(void *memPtr, U16 val) +{ + put_unaligned_le16(val, memPtr); +} + +MEM_STATIC U32 MEM_readLE24(const void *memPtr) +{ + return MEM_readLE16(memPtr) + (((const BYTE *)memPtr)[2] << 16); +} + +MEM_STATIC void MEM_writeLE24(void *memPtr, U32 val) +{ + MEM_writeLE16(memPtr, (U16)val); + ((BYTE *)memPtr)[2] = (BYTE)(val >> 16); +} + +MEM_STATIC U32 MEM_readLE32(const void *memPtr) +{ + return get_unaligned_le32(memPtr); +} + +MEM_STATIC void MEM_writeLE32(void *memPtr, U32 val32) +{ + put_unaligned_le32(val32, memPtr); +} + +MEM_STATIC U64 MEM_readLE64(const void *memPtr) +{ + return get_unaligned_le64(memPtr); +} + +MEM_STATIC void MEM_writeLE64(void *memPtr, U64 val64) +{ + put_unaligned_le64(val64, memPtr); +} + +MEM_STATIC size_t MEM_readLEST(const void *memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readLE32(memPtr); + else + return (size_t)MEM_readLE64(memPtr); +} + +MEM_STATIC void MEM_writeLEST(void *memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeLE32(memPtr, (U32)val); + else + MEM_writeLE64(memPtr, (U64)val); +} + +/*=== Big endian r/w ===*/ + +MEM_STATIC U32 MEM_readBE32(const void *memPtr) +{ + return get_unaligned_be32(memPtr); +} + +MEM_STATIC void MEM_writeBE32(void *memPtr, U32 val32) +{ + put_unaligned_be32(val32, memPtr); +} + +MEM_STATIC U64 MEM_readBE64(const void *memPtr) +{ + return get_unaligned_be64(memPtr); +} + +MEM_STATIC void MEM_writeBE64(void *memPtr, U64 val64) +{ + put_unaligned_be64(val64, memPtr); +} + +MEM_STATIC size_t MEM_readBEST(const void *memPtr) +{ + if (MEM_32bits()) + return (size_t)MEM_readBE32(memPtr); + else + return (size_t)MEM_readBE64(memPtr); +} + +MEM_STATIC void MEM_writeBEST(void *memPtr, size_t val) +{ + if (MEM_32bits()) + MEM_writeBE32(memPtr, (U32)val); + else + MEM_writeBE64(memPtr, (U64)val); +} + +MEM_STATIC U32 MEM_swap32(U32 in) +{ + return swab32(in); +} + +MEM_STATIC U64 MEM_swap64(U64 in) +{ + return swab64(in); +} + +MEM_STATIC size_t MEM_swapST(size_t in) +{ + if (MEM_32bits()) + return (size_t)MEM_swap32((U32)in); + else + return (size_t)MEM_swap64((U64)in); +} + +#endif /* MEM_H_MODULE */ diff --git a/lib/zstd/common/zstd_common.c b/lib/zstd/common/zstd_common.c new file mode 100644 index 000000000000..3d7e35b309b5 --- /dev/null +++ b/lib/zstd/common/zstd_common.c @@ -0,0 +1,83 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + + +/*-************************************* +* Dependencies +***************************************/ +#define ZSTD_DEPS_NEED_MALLOC +#include "zstd_deps.h" /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */ +#include "error_private.h" +#include "zstd_internal.h" + + +/*-**************************************** +* Version +******************************************/ +unsigned ZSTD_versionNumber(void) { return ZSTD_VERSION_NUMBER; } + +const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; } + + +/*-**************************************** +* ZSTD Error Management +******************************************/ +#undef ZSTD_isError /* defined within zstd_internal.h */ +/*! ZSTD_isError() : + * tells if a return value is an error code + * symbol is required for external callers */ +unsigned ZSTD_isError(size_t code) { return ERR_isError(code); } + +/*! ZSTD_getErrorName() : + * provides error code string from function result (useful for debugging) */ +const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); } + +/*! ZSTD_getError() : + * convert a `size_t` function result into a proper ZSTD_errorCode enum */ +ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); } + +/*! ZSTD_getErrorString() : + * provides error code string from enum */ +const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); } + + + +/*=************************************************************** +* Custom allocator +****************************************************************/ +void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) + return customMem.customAlloc(customMem.opaque, size); + return ZSTD_malloc(size); +} + +void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem) +{ + if (customMem.customAlloc) { + /* calloc implemented as malloc+memset; + * not as efficient as calloc, but next best guess for custom malloc */ + void* const ptr = customMem.customAlloc(customMem.opaque, size); + ZSTD_memset(ptr, 0, size); + return ptr; + } + return ZSTD_calloc(1, size); +} + +void ZSTD_customFree(void* ptr, ZSTD_customMem customMem) +{ + if (ptr!=NULL) { + if (customMem.customFree) + customMem.customFree(customMem.opaque, ptr); + else + ZSTD_free(ptr); + } +} diff --git a/lib/zstd/common/zstd_deps.h b/lib/zstd/common/zstd_deps.h new file mode 100644 index 000000000000..853b72426215 --- /dev/null +++ b/lib/zstd/common/zstd_deps.h @@ -0,0 +1,125 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* + * This file provides common libc dependencies that zstd requires. + * The purpose is to allow replacing this file with a custom implementation + * to compile zstd without libc support. + */ + +/* Need: + * NULL + * INT_MAX + * UINT_MAX + * ZSTD_memcpy() + * ZSTD_memset() + * ZSTD_memmove() + */ +#ifndef ZSTD_DEPS_COMMON +#define ZSTD_DEPS_COMMON + +#include +#include + +#define ZSTD_memcpy(d,s,n) __builtin_memcpy((d),(s),(n)) +#define ZSTD_memmove(d,s,n) __builtin_memmove((d),(s),(n)) +#define ZSTD_memset(d,s,n) __builtin_memset((d),(s),(n)) + +#endif /* ZSTD_DEPS_COMMON */ + +/* + * Define malloc as always failing. That means the user must + * either use ZSTD_customMem or statically allocate memory. + * Need: + * ZSTD_malloc() + * ZSTD_free() + * ZSTD_calloc() + */ +#ifdef ZSTD_DEPS_NEED_MALLOC +#ifndef ZSTD_DEPS_MALLOC +#define ZSTD_DEPS_MALLOC + +#define ZSTD_malloc(s) ({ (void)(s); NULL; }) +#define ZSTD_free(p) ((void)(p)) +#define ZSTD_calloc(n,s) ({ (void)(n); (void)(s); NULL; }) + +#endif /* ZSTD_DEPS_MALLOC */ +#endif /* ZSTD_DEPS_NEED_MALLOC */ + +/* + * Provides 64-bit math support. + * Need: + * U64 ZSTD_div64(U64 dividend, U32 divisor) + */ +#ifdef ZSTD_DEPS_NEED_MATH64 +#ifndef ZSTD_DEPS_MATH64 +#define ZSTD_DEPS_MATH64 + +#include + +static uint64_t ZSTD_div64(uint64_t dividend, uint32_t divisor) { + return div_u64(dividend, divisor); +} + +#endif /* ZSTD_DEPS_MATH64 */ +#endif /* ZSTD_DEPS_NEED_MATH64 */ + +/* + * This is only requested when DEBUGLEVEL >= 1, meaning + * it is disabled in production. + * Need: + * assert() + */ +#ifdef ZSTD_DEPS_NEED_ASSERT +#ifndef ZSTD_DEPS_ASSERT +#define ZSTD_DEPS_ASSERT + +#include + +#define assert(x) WARN_ON((x)) + +#endif /* ZSTD_DEPS_ASSERT */ +#endif /* ZSTD_DEPS_NEED_ASSERT */ + +/* + * This is only requested when DEBUGLEVEL >= 2, meaning + * it is disabled in production. + * Need: + * ZSTD_DEBUG_PRINT() + */ +#ifdef ZSTD_DEPS_NEED_IO +#ifndef ZSTD_DEPS_IO +#define ZSTD_DEPS_IO + +#include + +#define ZSTD_DEBUG_PRINT(...) pr_debug(__VA_ARGS__) + +#endif /* ZSTD_DEPS_IO */ +#endif /* ZSTD_DEPS_NEED_IO */ + +/* + * Only requested when MSAN is enabled. + * Need: + * intptr_t + */ +#ifdef ZSTD_DEPS_NEED_STDINT +#ifndef ZSTD_DEPS_STDINT +#define ZSTD_DEPS_STDINT + +/* + * The Linux Kernel doesn't provide intptr_t, only uintptr_t, which + * is an unsigned long. + */ +typedef long intptr_t; + +#endif /* ZSTD_DEPS_STDINT */ +#endif /* ZSTD_DEPS_NEED_STDINT */ diff --git a/lib/zstd/common/zstd_internal.h b/lib/zstd/common/zstd_internal.h new file mode 100644 index 000000000000..1f939cbe05ed --- /dev/null +++ b/lib/zstd/common/zstd_internal.h @@ -0,0 +1,450 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CCOMMON_H_MODULE +#define ZSTD_CCOMMON_H_MODULE + +/* this module contains definitions which must be identical + * across compression, decompression and dictBuilder. + * It also contains a few functions useful to at least 2 of them + * and which benefit from being inlined */ + +/*-************************************* +* Dependencies +***************************************/ +#include "compiler.h" +#include "mem.h" +#include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */ +#include "error_private.h" +#define ZSTD_STATIC_LINKING_ONLY +#include +#define FSE_STATIC_LINKING_ONLY +#include "fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "huf.h" +#include /* XXH_reset, update, digest */ +#define ZSTD_TRACE 0 + + +/* ---- static assert (debug) --- */ +#define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) +#define ZSTD_isError ERR_isError /* for inlining */ +#define FSE_isError ERR_isError +#define HUF_isError ERR_isError + + +/*-************************************* +* shared macros +***************************************/ +#undef MIN +#undef MAX +#define MIN(a,b) ((a)<(b) ? (a) : (b)) +#define MAX(a,b) ((a)>(b) ? (a) : (b)) + +/** + * Ignore: this is an internal helper. + * + * This is a helper function to help force C99-correctness during compilation. + * Under strict compilation modes, variadic macro arguments can't be empty. + * However, variadic function arguments can be. Using a function therefore lets + * us statically check that at least one (string) argument was passed, + * independent of the compilation flags. + */ +static INLINE_KEYWORD UNUSED_ATTR +void _force_has_format_string(const char *format, ...) { + (void)format; +} + +/** + * Ignore: this is an internal helper. + * + * We want to force this function invocation to be syntactically correct, but + * we don't want to force runtime evaluation of its arguments. + */ +#define _FORCE_HAS_FORMAT_STRING(...) \ + if (0) { \ + _force_has_format_string(__VA_ARGS__); \ + } + +/** + * Return the specified error if the condition evaluates to true. + * + * In debug modes, prints additional information. + * In order to do that (particularly, printing the conditional that failed), + * this can't just wrap RETURN_ERROR(). + */ +#define RETURN_ERROR_IF(cond, err, ...) \ + if (cond) { \ + RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } + +/** + * Unconditionally return the specified error. + * + * In debug modes, prints additional information. + */ +#define RETURN_ERROR(err, ...) \ + do { \ + RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return ERROR(err); \ + } while(0); + +/** + * If the provided expression evaluates to an error code, returns that error code. + * + * In debug modes, prints additional information. + */ +#define FORWARD_IF_ERROR(err, ...) \ + do { \ + size_t const err_code = (err); \ + if (ERR_isError(err_code)) { \ + RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \ + __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ + _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \ + RAWLOG(3, ": " __VA_ARGS__); \ + RAWLOG(3, "\n"); \ + return err_code; \ + } \ + } while(0); + + +/*-************************************* +* Common constants +***************************************/ +#define ZSTD_OPT_NUM (1<<12) + +#define ZSTD_REP_NUM 3 /* number of repcodes */ +#define ZSTD_REP_MOVE (ZSTD_REP_NUM-1) +static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 }; + +#define KB *(1 <<10) +#define MB *(1 <<20) +#define GB *(1U<<30) + +#define BIT7 128 +#define BIT6 64 +#define BIT5 32 +#define BIT4 16 +#define BIT1 2 +#define BIT0 1 + +#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 +static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 }; +static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 }; + +#define ZSTD_FRAMEIDSIZE 4 /* magic number size */ + +#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ +static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; +typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; + +#define ZSTD_FRAMECHECKSUMSIZE 4 + +#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ +#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ + +#define HufLog 12 +typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; + +#define LONGNBSEQ 0x7F00 + +#define MINMATCH 3 + +#define Litbits 8 +#define MaxLit ((1<= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN)); + + if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) { + /* Handle short offset copies. */ + do { + COPY8(op, ip) + } while (op < oend); + } else { + assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN); + /* Separate out the first COPY16() call because the copy length is + * almost certain to be short, so the branches have different + * probabilities. Since it is almost certain to be short, only do + * one COPY16() in the first call. Then, do two calls per loop since + * at that point it is more likely to have a high trip count. + */ +#ifdef __aarch64__ + do { + COPY16(op, ip); + } + while (op < oend); +#else + ZSTD_copy16(op, ip); + if (16 >= length) return; + op += 16; + ip += 16; + do { + COPY16(op, ip); + COPY16(op, ip); + } + while (op < oend); +#endif + } +} + +MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + size_t const length = MIN(dstCapacity, srcSize); + if (length > 0) { + ZSTD_memcpy(dst, src, length); + } + return length; +} + +/* define "workspace is too large" as this number of times larger than needed */ +#define ZSTD_WORKSPACETOOLARGE_FACTOR 3 + +/* when workspace is continuously too large + * during at least this number of times, + * context's memory usage is considered wasteful, + * because it's sized to handle a worst case scenario which rarely happens. + * In which case, resize it down to free some memory */ +#define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 + +/* Controls whether the input/output buffer is buffered or stable. */ +typedef enum { + ZSTD_bm_buffered = 0, /* Buffer the input/output */ + ZSTD_bm_stable = 1 /* ZSTD_inBuffer/ZSTD_outBuffer is stable */ +} ZSTD_bufferMode_e; + + +/*-******************************************* +* Private declarations +*********************************************/ +typedef struct seqDef_s { + U32 offset; /* Offset code of the sequence */ + U16 litLength; + U16 matchLength; +} seqDef; + +typedef struct { + seqDef* sequencesStart; + seqDef* sequences; /* ptr to end of sequences */ + BYTE* litStart; + BYTE* lit; /* ptr to end of literals */ + BYTE* llCode; + BYTE* mlCode; + BYTE* ofCode; + size_t maxNbSeq; + size_t maxNbLit; + + /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength + * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment + * the existing value of the litLength or matchLength by 0x10000. + */ + U32 longLengthID; /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */ + U32 longLengthPos; /* Index of the sequence to apply long length modification to */ +} seqStore_t; + +typedef struct { + U32 litLength; + U32 matchLength; +} ZSTD_sequenceLength; + +/** + * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences + * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength. + */ +MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq) +{ + ZSTD_sequenceLength seqLen; + seqLen.litLength = seq->litLength; + seqLen.matchLength = seq->matchLength + MINMATCH; + if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) { + if (seqStore->longLengthID == 1) { + seqLen.litLength += 0xFFFF; + } + if (seqStore->longLengthID == 2) { + seqLen.matchLength += 0xFFFF; + } + } + return seqLen; +} + +/** + * Contains the compressed frame size and an upper-bound for the decompressed frame size. + * Note: before using `compressedSize`, check for errors using ZSTD_isError(). + * similarly, before using `decompressedBound`, check for errors using: + * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` + */ +typedef struct { + size_t compressedSize; + unsigned long long decompressedBound; +} ZSTD_frameSizeInfo; /* decompress & legacy */ + +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ + +/* custom memory allocation functions */ +void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem); +void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem); +void ZSTD_customFree(void* ptr, ZSTD_customMem customMem); + + +MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */ +{ + assert(val != 0); + { +# if (__GNUC__ >= 3) /* GCC Intrinsic */ + return __builtin_clz (val) ^ 31; +# else /* Software version */ + static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; + U32 v = val; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return DeBruijnClz[(v * 0x07C4ACDDU) >> 27]; +# endif + } +} + + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */ + + +typedef struct { + blockType_e blockType; + U32 lastBlock; + U32 origSize; +} blockProperties_t; /* declared here for decompress and fullbench */ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr); + +/*! ZSTD_decodeSeqHeaders() : + * decode sequence header from src */ +/* Used by: decompress, fullbench (does not get its definition from here) */ +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize); + + + +#endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/lib/zstd/compress.c b/lib/zstd/compress.c deleted file mode 100644 index b080264ed3ad..000000000000 --- a/lib/zstd/compress.c +++ /dev/null @@ -1,3485 +0,0 @@ -/** - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - */ - -/*-************************************* -* Dependencies -***************************************/ -#include "fse.h" -#include "huf.h" -#include "mem.h" -#include "zstd_internal.h" /* includes zstd.h */ -#include -#include -#include /* memset */ - -/*-************************************* -* Constants -***************************************/ -static const U32 g_searchStrength = 8; /* control skip over incompressible data */ -#define HASH_READ_SIZE 8 -typedef enum { ZSTDcs_created = 0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; - -/*-************************************* -* Helper functions -***************************************/ -size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; } - -/*-************************************* -* Sequence storage -***************************************/ -static void ZSTD_resetSeqStore(seqStore_t *ssPtr) -{ - ssPtr->lit = ssPtr->litStart; - ssPtr->sequences = ssPtr->sequencesStart; - ssPtr->longLengthID = 0; -} - -/*-************************************* -* Context memory management -***************************************/ -struct ZSTD_CCtx_s { - const BYTE *nextSrc; /* next block here to continue on curr prefix */ - const BYTE *base; /* All regular indexes relative to this position */ - const BYTE *dictBase; /* extDict indexes relative to this position */ - U32 dictLimit; /* below that point, need extDict */ - U32 lowLimit; /* below that point, no more data */ - U32 nextToUpdate; /* index from which to continue dictionary update */ - U32 nextToUpdate3; /* index from which to continue dictionary update */ - U32 hashLog3; /* dispatch table : larger == faster, more memory */ - U32 loadedDictEnd; /* index of end of dictionary */ - U32 forceWindow; /* force back-references to respect limit of 1< 3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog); - size_t const h3Size = ((size_t)1) << hashLog3; - size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); - size_t const optSpace = - ((MaxML + 1) + (MaxLL + 1) + (MaxOff + 1) + (1 << Litbits)) * sizeof(U32) + (ZSTD_OPT_NUM + 1) * (sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t)); - size_t const workspaceSize = tableSpace + (256 * sizeof(U32)) /* huffTable */ + tokenSpace + - (((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btopt2)) ? optSpace : 0); - - return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_CCtx)) + ZSTD_ALIGN(workspaceSize); -} - -static ZSTD_CCtx *ZSTD_createCCtx_advanced(ZSTD_customMem customMem) -{ - ZSTD_CCtx *cctx; - if (!customMem.customAlloc || !customMem.customFree) - return NULL; - cctx = (ZSTD_CCtx *)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem); - if (!cctx) - return NULL; - memset(cctx, 0, sizeof(ZSTD_CCtx)); - cctx->customMem = customMem; - return cctx; -} - -ZSTD_CCtx *ZSTD_initCCtx(void *workspace, size_t workspaceSize) -{ - ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); - ZSTD_CCtx *cctx = ZSTD_createCCtx_advanced(stackMem); - if (cctx) { - cctx->workSpace = ZSTD_stackAllocAll(cctx->customMem.opaque, &cctx->workSpaceSize); - } - return cctx; -} - -size_t ZSTD_freeCCtx(ZSTD_CCtx *cctx) -{ - if (cctx == NULL) - return 0; /* support free on NULL */ - ZSTD_free(cctx->workSpace, cctx->customMem); - ZSTD_free(cctx, cctx->customMem); - return 0; /* reserved as a potential error code in the future */ -} - -const seqStore_t *ZSTD_getSeqStore(const ZSTD_CCtx *ctx) /* hidden interface */ { return &(ctx->seqStore); } - -static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx *cctx) { return cctx->params; } - -/** ZSTD_checkParams() : - ensure param values remain within authorized range. - @return : 0, or an error code if one value is beyond authorized range */ -size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) -{ -#define CLAMPCHECK(val, min, max) \ - { \ - if ((val < min) | (val > max)) \ - return ERROR(compressionParameter_unsupported); \ - } - CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX); - CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX); - CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX); - CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX); - CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX); - CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX); - if ((U32)(cParams.strategy) > (U32)ZSTD_btopt2) - return ERROR(compressionParameter_unsupported); - return 0; -} - -/** ZSTD_cycleLog() : - * condition for correct operation : hashLog > 1 */ -static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) -{ - U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); - return hashLog - btScale; -} - -/** ZSTD_adjustCParams() : - optimize `cPar` for a given input (`srcSize` and `dictSize`). - mostly downsizing to reduce memory consumption and initialization. - Both `srcSize` and `dictSize` are optional (use 0 if unknown), - but if both are 0, no optimization can be done. - Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */ -ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize) -{ - if (srcSize + dictSize == 0) - return cPar; /* no size information available : no adjustment */ - - /* resize params, to use less memory when necessary */ - { - U32 const minSrcSize = (srcSize == 0) ? 500 : 0; - U64 const rSize = srcSize + dictSize + minSrcSize; - if (rSize < ((U64)1 << ZSTD_WINDOWLOG_MAX)) { - U32 const srcLog = MAX(ZSTD_HASHLOG_MIN, ZSTD_highbit32((U32)(rSize)-1) + 1); - if (cPar.windowLog > srcLog) - cPar.windowLog = srcLog; - } - } - if (cPar.hashLog > cPar.windowLog) - cPar.hashLog = cPar.windowLog; - { - U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); - if (cycleLog > cPar.windowLog) - cPar.chainLog -= (cycleLog - cPar.windowLog); - } - - if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) - cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ - - return cPar; -} - -static U32 ZSTD_equivalentParams(ZSTD_parameters param1, ZSTD_parameters param2) -{ - return (param1.cParams.hashLog == param2.cParams.hashLog) & (param1.cParams.chainLog == param2.cParams.chainLog) & - (param1.cParams.strategy == param2.cParams.strategy) & ((param1.cParams.searchLength == 3) == (param2.cParams.searchLength == 3)); -} - -/*! ZSTD_continueCCtx() : - reuse CCtx without reset (note : requires no dictionary) */ -static size_t ZSTD_continueCCtx(ZSTD_CCtx *cctx, ZSTD_parameters params, U64 frameContentSize) -{ - U32 const end = (U32)(cctx->nextSrc - cctx->base); - cctx->params = params; - cctx->frameContentSize = frameContentSize; - cctx->lowLimit = end; - cctx->dictLimit = end; - cctx->nextToUpdate = end + 1; - cctx->stage = ZSTDcs_init; - cctx->dictID = 0; - cctx->loadedDictEnd = 0; - { - int i; - for (i = 0; i < ZSTD_REP_NUM; i++) - cctx->rep[i] = repStartValue[i]; - } - cctx->seqStore.litLengthSum = 0; /* force reset of btopt stats */ - xxh64_reset(&cctx->xxhState, 0); - return 0; -} - -typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e; - -/*! ZSTD_resetCCtx_advanced() : - note : `params` must be validated */ -static size_t ZSTD_resetCCtx_advanced(ZSTD_CCtx *zc, ZSTD_parameters params, U64 frameContentSize, ZSTD_compResetPolicy_e const crp) -{ - if (crp == ZSTDcrp_continue) - if (ZSTD_equivalentParams(params, zc->params)) { - zc->flagStaticTables = 0; - zc->flagStaticHufTable = HUF_repeat_none; - return ZSTD_continueCCtx(zc, params, frameContentSize); - } - - { - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog); - U32 const divider = (params.cParams.searchLength == 3) ? 3 : 4; - size_t const maxNbSeq = blockSize / divider; - size_t const tokenSpace = blockSize + 11 * maxNbSeq; - size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog); - size_t const hSize = ((size_t)1) << params.cParams.hashLog; - U32 const hashLog3 = (params.cParams.searchLength > 3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog); - size_t const h3Size = ((size_t)1) << hashLog3; - size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); - void *ptr; - - /* Check if workSpace is large enough, alloc a new one if needed */ - { - size_t const optSpace = ((MaxML + 1) + (MaxLL + 1) + (MaxOff + 1) + (1 << Litbits)) * sizeof(U32) + - (ZSTD_OPT_NUM + 1) * (sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t)); - size_t const neededSpace = tableSpace + (256 * sizeof(U32)) /* huffTable */ + tokenSpace + - (((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) ? optSpace : 0); - if (zc->workSpaceSize < neededSpace) { - ZSTD_free(zc->workSpace, zc->customMem); - zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); - if (zc->workSpace == NULL) - return ERROR(memory_allocation); - zc->workSpaceSize = neededSpace; - } - } - - if (crp != ZSTDcrp_noMemset) - memset(zc->workSpace, 0, tableSpace); /* reset tables only */ - xxh64_reset(&zc->xxhState, 0); - zc->hashLog3 = hashLog3; - zc->hashTable = (U32 *)(zc->workSpace); - zc->chainTable = zc->hashTable + hSize; - zc->hashTable3 = zc->chainTable + chainSize; - ptr = zc->hashTable3 + h3Size; - zc->hufTable = (HUF_CElt *)ptr; - zc->flagStaticTables = 0; - zc->flagStaticHufTable = HUF_repeat_none; - ptr = ((U32 *)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */ - - zc->nextToUpdate = 1; - zc->nextSrc = NULL; - zc->base = NULL; - zc->dictBase = NULL; - zc->dictLimit = 0; - zc->lowLimit = 0; - zc->params = params; - zc->blockSize = blockSize; - zc->frameContentSize = frameContentSize; - { - int i; - for (i = 0; i < ZSTD_REP_NUM; i++) - zc->rep[i] = repStartValue[i]; - } - - if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) { - zc->seqStore.litFreq = (U32 *)ptr; - zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1 << Litbits); - zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL + 1); - zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML + 1); - ptr = zc->seqStore.offCodeFreq + (MaxOff + 1); - zc->seqStore.matchTable = (ZSTD_match_t *)ptr; - ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM + 1; - zc->seqStore.priceTable = (ZSTD_optimal_t *)ptr; - ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM + 1; - zc->seqStore.litLengthSum = 0; - } - zc->seqStore.sequencesStart = (seqDef *)ptr; - ptr = zc->seqStore.sequencesStart + maxNbSeq; - zc->seqStore.llCode = (BYTE *)ptr; - zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq; - zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq; - zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq; - - zc->stage = ZSTDcs_init; - zc->dictID = 0; - zc->loadedDictEnd = 0; - - return 0; - } -} - -/* ZSTD_invalidateRepCodes() : - * ensures next compression will not use repcodes from previous block. - * Note : only works with regular variant; - * do not use with extDict variant ! */ -void ZSTD_invalidateRepCodes(ZSTD_CCtx *cctx) -{ - int i; - for (i = 0; i < ZSTD_REP_NUM; i++) - cctx->rep[i] = 0; -} - -/*! ZSTD_copyCCtx() : -* Duplicate an existing context `srcCCtx` into another one `dstCCtx`. -* Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). -* @return : 0, or an error code */ -size_t ZSTD_copyCCtx(ZSTD_CCtx *dstCCtx, const ZSTD_CCtx *srcCCtx, unsigned long long pledgedSrcSize) -{ - if (srcCCtx->stage != ZSTDcs_init) - return ERROR(stage_wrong); - - memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); - { - ZSTD_parameters params = srcCCtx->params; - params.fParams.contentSizeFlag = (pledgedSrcSize > 0); - ZSTD_resetCCtx_advanced(dstCCtx, params, pledgedSrcSize, ZSTDcrp_noMemset); - } - - /* copy tables */ - { - size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog); - size_t const hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog; - size_t const h3Size = (size_t)1 << srcCCtx->hashLog3; - size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); - memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace); - } - - /* copy dictionary offsets */ - dstCCtx->nextToUpdate = srcCCtx->nextToUpdate; - dstCCtx->nextToUpdate3 = srcCCtx->nextToUpdate3; - dstCCtx->nextSrc = srcCCtx->nextSrc; - dstCCtx->base = srcCCtx->base; - dstCCtx->dictBase = srcCCtx->dictBase; - dstCCtx->dictLimit = srcCCtx->dictLimit; - dstCCtx->lowLimit = srcCCtx->lowLimit; - dstCCtx->loadedDictEnd = srcCCtx->loadedDictEnd; - dstCCtx->dictID = srcCCtx->dictID; - - /* copy entropy tables */ - dstCCtx->flagStaticTables = srcCCtx->flagStaticTables; - dstCCtx->flagStaticHufTable = srcCCtx->flagStaticHufTable; - if (srcCCtx->flagStaticTables) { - memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable)); - memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable)); - memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable)); - } - if (srcCCtx->flagStaticHufTable) { - memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256 * 4); - } - - return 0; -} - -/*! ZSTD_reduceTable() : -* reduce table indexes by `reducerValue` */ -static void ZSTD_reduceTable(U32 *const table, U32 const size, U32 const reducerValue) -{ - U32 u; - for (u = 0; u < size; u++) { - if (table[u] < reducerValue) - table[u] = 0; - else - table[u] -= reducerValue; - } -} - -/*! ZSTD_reduceIndex() : -* rescale all indexes to avoid future overflow (indexes are U32) */ -static void ZSTD_reduceIndex(ZSTD_CCtx *zc, const U32 reducerValue) -{ - { - U32 const hSize = 1 << zc->params.cParams.hashLog; - ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); - } - - { - U32 const chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog); - ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); - } - - { - U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0; - ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); - } -} - -/*-******************************************************* -* Block entropic compression -*********************************************************/ - -/* See doc/zstd_compression_format.md for detailed format description */ - -size_t ZSTD_noCompressBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - if (srcSize + ZSTD_blockHeaderSize > dstCapacity) - return ERROR(dstSize_tooSmall); - memcpy((BYTE *)dst + ZSTD_blockHeaderSize, src, srcSize); - ZSTD_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw); - return ZSTD_blockHeaderSize + srcSize; -} - -static size_t ZSTD_noCompressLiterals(void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - BYTE *const ostart = (BYTE * const)dst; - U32 const flSize = 1 + (srcSize > 31) + (srcSize > 4095); - - if (srcSize + flSize > dstCapacity) - return ERROR(dstSize_tooSmall); - - switch (flSize) { - case 1: /* 2 - 1 - 5 */ ostart[0] = (BYTE)((U32)set_basic + (srcSize << 3)); break; - case 2: /* 2 - 2 - 12 */ ZSTD_writeLE16(ostart, (U16)((U32)set_basic + (1 << 2) + (srcSize << 4))); break; - default: /*note : should not be necessary : flSize is within {1,2,3} */ - case 3: /* 2 - 2 - 20 */ ZSTD_writeLE32(ostart, (U32)((U32)set_basic + (3 << 2) + (srcSize << 4))); break; - } - - memcpy(ostart + flSize, src, srcSize); - return srcSize + flSize; -} - -static size_t ZSTD_compressRleLiteralsBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - BYTE *const ostart = (BYTE * const)dst; - U32 const flSize = 1 + (srcSize > 31) + (srcSize > 4095); - - (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ - - switch (flSize) { - case 1: /* 2 - 1 - 5 */ ostart[0] = (BYTE)((U32)set_rle + (srcSize << 3)); break; - case 2: /* 2 - 2 - 12 */ ZSTD_writeLE16(ostart, (U16)((U32)set_rle + (1 << 2) + (srcSize << 4))); break; - default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */ - case 3: /* 2 - 2 - 20 */ ZSTD_writeLE32(ostart, (U32)((U32)set_rle + (3 << 2) + (srcSize << 4))); break; - } - - ostart[flSize] = *(const BYTE *)src; - return flSize + 1; -} - -static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; } - -static size_t ZSTD_compressLiterals(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - size_t const minGain = ZSTD_minGain(srcSize); - size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); - BYTE *const ostart = (BYTE *)dst; - U32 singleStream = srcSize < 256; - symbolEncodingType_e hType = set_compressed; - size_t cLitSize; - -/* small ? don't even attempt compression (speed opt) */ -#define LITERAL_NOENTROPY 63 - { - size_t const minLitSize = zc->flagStaticHufTable == HUF_repeat_valid ? 6 : LITERAL_NOENTROPY; - if (srcSize <= minLitSize) - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } - - if (dstCapacity < lhSize + 1) - return ERROR(dstSize_tooSmall); /* not enough space for compression */ - { - HUF_repeat repeat = zc->flagStaticHufTable; - int const preferRepeat = zc->params.cParams.strategy < ZSTD_lazy ? srcSize <= 1024 : 0; - if (repeat == HUF_repeat_valid && lhSize == 3) - singleStream = 1; - cLitSize = singleStream ? HUF_compress1X_repeat(ostart + lhSize, dstCapacity - lhSize, src, srcSize, 255, 11, zc->tmpCounters, - sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat) - : HUF_compress4X_repeat(ostart + lhSize, dstCapacity - lhSize, src, srcSize, 255, 11, zc->tmpCounters, - sizeof(zc->tmpCounters), zc->hufTable, &repeat, preferRepeat); - if (repeat != HUF_repeat_none) { - hType = set_repeat; - } /* reused the existing table */ - else { - zc->flagStaticHufTable = HUF_repeat_check; - } /* now have a table to reuse */ - } - - if ((cLitSize == 0) | (cLitSize >= srcSize - minGain)) { - zc->flagStaticHufTable = HUF_repeat_none; - return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); - } - if (cLitSize == 1) { - zc->flagStaticHufTable = HUF_repeat_none; - return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); - } - - /* Build header */ - switch (lhSize) { - case 3: /* 2 - 2 - 10 - 10 */ - { - U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 14); - ZSTD_writeLE24(ostart, lhc); - break; - } - case 4: /* 2 - 2 - 14 - 14 */ - { - U32 const lhc = hType + (2 << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 18); - ZSTD_writeLE32(ostart, lhc); - break; - } - default: /* should not be necessary, lhSize is only {3,4,5} */ - case 5: /* 2 - 2 - 18 - 18 */ - { - U32 const lhc = hType + (3 << 2) + ((U32)srcSize << 4) + ((U32)cLitSize << 22); - ZSTD_writeLE32(ostart, lhc); - ostart[4] = (BYTE)(cLitSize >> 10); - break; - } - } - return lhSize + cLitSize; -} - -static const BYTE LL_Code[64] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 16, 17, 17, 18, 18, - 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, - 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24}; - -static const BYTE ML_Code[128] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, - 26, 27, 28, 29, 30, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, - 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, - 40, 40, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 42, 42, 42, - 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42}; - -void ZSTD_seqToCodes(const seqStore_t *seqStorePtr) -{ - BYTE const LL_deltaCode = 19; - BYTE const ML_deltaCode = 36; - const seqDef *const sequences = seqStorePtr->sequencesStart; - BYTE *const llCodeTable = seqStorePtr->llCode; - BYTE *const ofCodeTable = seqStorePtr->ofCode; - BYTE *const mlCodeTable = seqStorePtr->mlCode; - U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - U32 u; - for (u = 0; u < nbSeq; u++) { - U32 const llv = sequences[u].litLength; - U32 const mlv = sequences[u].matchLength; - llCodeTable[u] = (llv > 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv]; - ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset); - mlCodeTable[u] = (mlv > 127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv]; - } - if (seqStorePtr->longLengthID == 1) - llCodeTable[seqStorePtr->longLengthPos] = MaxLL; - if (seqStorePtr->longLengthID == 2) - mlCodeTable[seqStorePtr->longLengthPos] = MaxML; -} - -ZSTD_STATIC size_t ZSTD_compressSequences_internal(ZSTD_CCtx *zc, void *dst, size_t dstCapacity) -{ - const int longOffsets = zc->params.cParams.windowLog > STREAM_ACCUMULATOR_MIN; - const seqStore_t *seqStorePtr = &(zc->seqStore); - FSE_CTable *CTable_LitLength = zc->litlengthCTable; - FSE_CTable *CTable_OffsetBits = zc->offcodeCTable; - FSE_CTable *CTable_MatchLength = zc->matchlengthCTable; - U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ - const seqDef *const sequences = seqStorePtr->sequencesStart; - const BYTE *const ofCodeTable = seqStorePtr->ofCode; - const BYTE *const llCodeTable = seqStorePtr->llCode; - const BYTE *const mlCodeTable = seqStorePtr->mlCode; - BYTE *const ostart = (BYTE *)dst; - BYTE *const oend = ostart + dstCapacity; - BYTE *op = ostart; - size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; - BYTE *seqHead; - - U32 *count; - S16 *norm; - U32 *workspace; - size_t workspaceSize = sizeof(zc->tmpCounters); - { - size_t spaceUsed32 = 0; - count = (U32 *)zc->tmpCounters + spaceUsed32; - spaceUsed32 += MaxSeq + 1; - norm = (S16 *)((U32 *)zc->tmpCounters + spaceUsed32); - spaceUsed32 += ALIGN(sizeof(S16) * (MaxSeq + 1), sizeof(U32)) >> 2; - - workspace = (U32 *)zc->tmpCounters + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - } - - /* Compress literals */ - { - const BYTE *const literals = seqStorePtr->litStart; - size_t const litSize = seqStorePtr->lit - literals; - size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize); - if (ZSTD_isError(cSize)) - return cSize; - op += cSize; - } - - /* Sequences Header */ - if ((oend - op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) - return ERROR(dstSize_tooSmall); - if (nbSeq < 0x7F) - *op++ = (BYTE)nbSeq; - else if (nbSeq < LONGNBSEQ) - op[0] = (BYTE)((nbSeq >> 8) + 0x80), op[1] = (BYTE)nbSeq, op += 2; - else - op[0] = 0xFF, ZSTD_writeLE16(op + 1, (U16)(nbSeq - LONGNBSEQ)), op += 3; - if (nbSeq == 0) - return op - ostart; - - /* seqHead : flags for FSE encoding type */ - seqHead = op++; - -#define MIN_SEQ_FOR_DYNAMIC_FSE 64 -#define MAX_SEQ_FOR_STATIC_FSE 1000 - - /* convert length/distances into codes */ - ZSTD_seqToCodes(seqStorePtr); - - /* CTable for Literal Lengths */ - { - U32 max = MaxLL; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = llCodeTable[0]; - FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); - LLtype = set_rle; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - LLtype = set_repeat; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog - 1)))) { - FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, workspace, workspaceSize); - LLtype = set_basic; - } else { - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max); - if (count[llCodeTable[nbSeq - 1]] > 1) { - count[llCodeTable[nbSeq - 1]]--; - nbSeq_1--; - } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - { - size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) - return NCountSize; - op += NCountSize; - } - FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, workspace, workspaceSize); - LLtype = set_compressed; - } - } - - /* CTable for Offsets */ - { - U32 max = MaxOff; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = ofCodeTable[0]; - FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); - Offtype = set_rle; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - Offtype = set_repeat; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog - 1)))) { - FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, workspace, workspaceSize); - Offtype = set_basic; - } else { - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max); - if (count[ofCodeTable[nbSeq - 1]] > 1) { - count[ofCodeTable[nbSeq - 1]]--; - nbSeq_1--; - } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - { - size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) - return NCountSize; - op += NCountSize; - } - FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, workspace, workspaceSize); - Offtype = set_compressed; - } - } - - /* CTable for MatchLengths */ - { - U32 max = MaxML; - size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace); - if ((mostFrequent == nbSeq) && (nbSeq > 2)) { - *op++ = *mlCodeTable; - FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); - MLtype = set_rle; - } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { - MLtype = set_repeat; - } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog - 1)))) { - FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, workspace, workspaceSize); - MLtype = set_basic; - } else { - size_t nbSeq_1 = nbSeq; - const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max); - if (count[mlCodeTable[nbSeq - 1]] > 1) { - count[mlCodeTable[nbSeq - 1]]--; - nbSeq_1--; - } - FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max); - { - size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ - if (FSE_isError(NCountSize)) - return NCountSize; - op += NCountSize; - } - FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, workspace, workspaceSize); - MLtype = set_compressed; - } - } - - *seqHead = (BYTE)((LLtype << 6) + (Offtype << 4) + (MLtype << 2)); - zc->flagStaticTables = 0; - - /* Encoding Sequences */ - { - BIT_CStream_t blockStream; - FSE_CState_t stateMatchLength; - FSE_CState_t stateOffsetBits; - FSE_CState_t stateLitLength; - - CHECK_E(BIT_initCStream(&blockStream, op, oend - op), dstSize_tooSmall); /* not enough space remaining */ - - /* first symbols */ - FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq - 1]); - FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq - 1]); - FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq - 1]); - BIT_addBits(&blockStream, sequences[nbSeq - 1].litLength, LL_bits[llCodeTable[nbSeq - 1]]); - if (ZSTD_32bits()) - BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[nbSeq - 1].matchLength, ML_bits[mlCodeTable[nbSeq - 1]]); - if (ZSTD_32bits()) - BIT_flushBits(&blockStream); - if (longOffsets) { - U32 const ofBits = ofCodeTable[nbSeq - 1]; - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN - 1); - if (extraBits) { - BIT_addBits(&blockStream, sequences[nbSeq - 1].offset, extraBits); - BIT_flushBits(&blockStream); - } - BIT_addBits(&blockStream, sequences[nbSeq - 1].offset >> extraBits, ofBits - extraBits); - } else { - BIT_addBits(&blockStream, sequences[nbSeq - 1].offset, ofCodeTable[nbSeq - 1]); - } - BIT_flushBits(&blockStream); - - { - size_t n; - for (n = nbSeq - 2; n < nbSeq; n--) { /* intentional underflow */ - BYTE const llCode = llCodeTable[n]; - BYTE const ofCode = ofCodeTable[n]; - BYTE const mlCode = mlCodeTable[n]; - U32 const llBits = LL_bits[llCode]; - U32 const ofBits = ofCode; /* 32b*/ /* 64b*/ - U32 const mlBits = ML_bits[mlCode]; - /* (7)*/ /* (7)*/ - FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ - FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ - if (ZSTD_32bits()) - BIT_flushBits(&blockStream); /* (7)*/ - FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ - if (ZSTD_32bits() || (ofBits + mlBits + llBits >= 64 - 7 - (LLFSELog + MLFSELog + OffFSELog))) - BIT_flushBits(&blockStream); /* (7)*/ - BIT_addBits(&blockStream, sequences[n].litLength, llBits); - if (ZSTD_32bits() && ((llBits + mlBits) > 24)) - BIT_flushBits(&blockStream); - BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); - if (ZSTD_32bits()) - BIT_flushBits(&blockStream); /* (7)*/ - if (longOffsets) { - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN - 1); - if (extraBits) { - BIT_addBits(&blockStream, sequences[n].offset, extraBits); - BIT_flushBits(&blockStream); /* (7)*/ - } - BIT_addBits(&blockStream, sequences[n].offset >> extraBits, ofBits - extraBits); /* 31 */ - } else { - BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ - } - BIT_flushBits(&blockStream); /* (7)*/ - } - } - - FSE_flushCState(&blockStream, &stateMatchLength); - FSE_flushCState(&blockStream, &stateOffsetBits); - FSE_flushCState(&blockStream, &stateLitLength); - - { - size_t const streamSize = BIT_closeCStream(&blockStream); - if (streamSize == 0) - return ERROR(dstSize_tooSmall); /* not enough space */ - op += streamSize; - } - } - return op - ostart; -} - -ZSTD_STATIC size_t ZSTD_compressSequences(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, size_t srcSize) -{ - size_t const cSize = ZSTD_compressSequences_internal(zc, dst, dstCapacity); - size_t const minGain = ZSTD_minGain(srcSize); - size_t const maxCSize = srcSize - minGain; - /* If the srcSize <= dstCapacity, then there is enough space to write a - * raw uncompressed block. Since we ran out of space, the block must not - * be compressible, so fall back to a raw uncompressed block. - */ - int const uncompressibleError = cSize == ERROR(dstSize_tooSmall) && srcSize <= dstCapacity; - int i; - - if (ZSTD_isError(cSize) && !uncompressibleError) - return cSize; - if (cSize >= maxCSize || uncompressibleError) { - zc->flagStaticHufTable = HUF_repeat_none; - return 0; - } - /* confirm repcodes */ - for (i = 0; i < ZSTD_REP_NUM; i++) - zc->rep[i] = zc->repToConfirm[i]; - return cSize; -} - -/*! ZSTD_storeSeq() : - Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. - `offsetCode` : distance to match, or 0 == repCode. - `matchCode` : matchLength - MINMATCH -*/ -ZSTD_STATIC void ZSTD_storeSeq(seqStore_t *seqStorePtr, size_t litLength, const void *literals, U32 offsetCode, size_t matchCode) -{ - /* copy Literals */ - ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); - seqStorePtr->lit += litLength; - - /* literal Length */ - if (litLength > 0xFFFF) { - seqStorePtr->longLengthID = 1; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - } - seqStorePtr->sequences[0].litLength = (U16)litLength; - - /* match offset */ - seqStorePtr->sequences[0].offset = offsetCode + 1; - - /* match Length */ - if (matchCode > 0xFFFF) { - seqStorePtr->longLengthID = 2; - seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); - } - seqStorePtr->sequences[0].matchLength = (U16)matchCode; - - seqStorePtr->sequences++; -} - -/*-************************************* -* Match length counter -***************************************/ -static unsigned ZSTD_NbCommonBytes(register size_t val) -{ - if (ZSTD_isLittleEndian()) { - if (ZSTD_64bits()) { - return (__builtin_ctzll((U64)val) >> 3); - } else { /* 32 bits */ - return (__builtin_ctz((U32)val) >> 3); - } - } else { /* Big Endian CPU */ - if (ZSTD_64bits()) { - return (__builtin_clzll(val) >> 3); - } else { /* 32 bits */ - return (__builtin_clz((U32)val) >> 3); - } - } -} - -static size_t ZSTD_count(const BYTE *pIn, const BYTE *pMatch, const BYTE *const pInLimit) -{ - const BYTE *const pStart = pIn; - const BYTE *const pInLoopLimit = pInLimit - (sizeof(size_t) - 1); - - while (pIn < pInLoopLimit) { - size_t const diff = ZSTD_readST(pMatch) ^ ZSTD_readST(pIn); - if (!diff) { - pIn += sizeof(size_t); - pMatch += sizeof(size_t); - continue; - } - pIn += ZSTD_NbCommonBytes(diff); - return (size_t)(pIn - pStart); - } - if (ZSTD_64bits()) - if ((pIn < (pInLimit - 3)) && (ZSTD_read32(pMatch) == ZSTD_read32(pIn))) { - pIn += 4; - pMatch += 4; - } - if ((pIn < (pInLimit - 1)) && (ZSTD_read16(pMatch) == ZSTD_read16(pIn))) { - pIn += 2; - pMatch += 2; - } - if ((pIn < pInLimit) && (*pMatch == *pIn)) - pIn++; - return (size_t)(pIn - pStart); -} - -/** ZSTD_count_2segments() : -* can count match length with `ip` & `match` in 2 different segments. -* convention : on reaching mEnd, match count continue starting from iStart -*/ -static size_t ZSTD_count_2segments(const BYTE *ip, const BYTE *match, const BYTE *iEnd, const BYTE *mEnd, const BYTE *iStart) -{ - const BYTE *const vEnd = MIN(ip + (mEnd - match), iEnd); - size_t const matchLength = ZSTD_count(ip, match, vEnd); - if (match + matchLength != mEnd) - return matchLength; - return matchLength + ZSTD_count(ip + matchLength, iStart, iEnd); -} - -/*-************************************* -* Hashes -***************************************/ -static const U32 prime3bytes = 506832829U; -static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32 - 24)) * prime3bytes) >> (32 - h); } -ZSTD_STATIC size_t ZSTD_hash3Ptr(const void *ptr, U32 h) { return ZSTD_hash3(ZSTD_readLE32(ptr), h); } /* only in zstd_opt.h */ - -static const U32 prime4bytes = 2654435761U; -static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32 - h); } -static size_t ZSTD_hash4Ptr(const void *ptr, U32 h) { return ZSTD_hash4(ZSTD_read32(ptr), h); } - -static const U64 prime5bytes = 889523592379ULL; -static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64 - 40)) * prime5bytes) >> (64 - h)); } -static size_t ZSTD_hash5Ptr(const void *p, U32 h) { return ZSTD_hash5(ZSTD_readLE64(p), h); } - -static const U64 prime6bytes = 227718039650203ULL; -static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64 - 48)) * prime6bytes) >> (64 - h)); } -static size_t ZSTD_hash6Ptr(const void *p, U32 h) { return ZSTD_hash6(ZSTD_readLE64(p), h); } - -static const U64 prime7bytes = 58295818150454627ULL; -static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64 - 56)) * prime7bytes) >> (64 - h)); } -static size_t ZSTD_hash7Ptr(const void *p, U32 h) { return ZSTD_hash7(ZSTD_readLE64(p), h); } - -static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; -static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u)*prime8bytes) >> (64 - h)); } -static size_t ZSTD_hash8Ptr(const void *p, U32 h) { return ZSTD_hash8(ZSTD_readLE64(p), h); } - -static size_t ZSTD_hashPtr(const void *p, U32 hBits, U32 mls) -{ - switch (mls) { - // case 3: return ZSTD_hash3Ptr(p, hBits); - default: - case 4: return ZSTD_hash4Ptr(p, hBits); - case 5: return ZSTD_hash5Ptr(p, hBits); - case 6: return ZSTD_hash6Ptr(p, hBits); - case 7: return ZSTD_hash7Ptr(p, hBits); - case 8: return ZSTD_hash8Ptr(p, hBits); - } -} - -/*-************************************* -* Fast Scan -***************************************/ -static void ZSTD_fillHashTable(ZSTD_CCtx *zc, const void *end, const U32 mls) -{ - U32 *const hashTable = zc->hashTable; - U32 const hBits = zc->params.cParams.hashLog; - const BYTE *const base = zc->base; - const BYTE *ip = base + zc->nextToUpdate; - const BYTE *const iend = ((const BYTE *)end) - HASH_READ_SIZE; - const size_t fastHashFillStep = 3; - - while (ip <= iend) { - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base); - ip += fastHashFillStep; - } -} - -FORCE_INLINE -void ZSTD_compressBlock_fast_generic(ZSTD_CCtx *cctx, const void *src, size_t srcSize, const U32 mls) -{ - U32 *const hashTable = cctx->hashTable; - U32 const hBits = cctx->params.cParams.hashLog; - seqStore_t *seqStorePtr = &(cctx->seqStore); - const BYTE *const base = cctx->base; - const BYTE *const istart = (const BYTE *)src; - const BYTE *ip = istart; - const BYTE *anchor = istart; - const U32 lowestIndex = cctx->dictLimit; - const BYTE *const lowest = base + lowestIndex; - const BYTE *const iend = istart + srcSize; - const BYTE *const ilimit = iend - HASH_READ_SIZE; - U32 offset_1 = cctx->rep[0], offset_2 = cctx->rep[1]; - U32 offsetSaved = 0; - - /* init */ - ip += (ip == lowest); - { - U32 const maxRep = (U32)(ip - lowest); - if (offset_2 > maxRep) - offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) - offsetSaved = offset_1, offset_1 = 0; - } - - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ - size_t mLength; - size_t const h = ZSTD_hashPtr(ip, hBits, mls); - U32 const curr = (U32)(ip - base); - U32 const matchIndex = hashTable[h]; - const BYTE *match = base + matchIndex; - hashTable[h] = curr; /* update hash table */ - - if ((offset_1 > 0) & (ZSTD_read32(ip + 1 - offset_1) == ZSTD_read32(ip + 1))) { - mLength = ZSTD_count(ip + 1 + 4, ip + 1 + 4 - offset_1, iend) + 4; - ip++; - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH); - } else { - U32 offset; - if ((matchIndex <= lowestIndex) || (ZSTD_read32(match) != ZSTD_read32(ip))) { - ip += ((ip - anchor) >> g_searchStrength) + 1; - continue; - } - mLength = ZSTD_count(ip + 4, match + 4, iend) + 4; - offset = (U32)(ip - match); - while (((ip > anchor) & (match > lowest)) && (ip[-1] == match[-1])) { - ip--; - match--; - mLength++; - } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH); - } - - /* match found */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - hashTable[ZSTD_hashPtr(base + curr + 2, hBits, mls)] = curr + 2; /* here because curr+2 could be > iend-8 */ - hashTable[ZSTD_hashPtr(ip - 2, hBits, mls)] = (U32)(ip - 2 - base); - /* check immediate repcode */ - while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip + 4, ip + 4 - offset_2, iend) + 4; - { - U32 const tmpOff = offset_2; - offset_2 = offset_1; - offset_1 = tmpOff; - } /* swap offset_2 <=> offset_1 */ - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength - MINMATCH); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } - } - } - - /* save reps for next block */ - cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; - cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; - - /* Last Literals */ - { - size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -static void ZSTD_compressBlock_fast(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ - const U32 mls = ctx->params.cParams.searchLength; - switch (mls) { - default: /* includes case 3 */ - case 4: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return; - case 5: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return; - case 6: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return; - case 7: ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return; - } -} - -static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 mls) -{ - U32 *hashTable = ctx->hashTable; - const U32 hBits = ctx->params.cParams.hashLog; - seqStore_t *seqStorePtr = &(ctx->seqStore); - const BYTE *const base = ctx->base; - const BYTE *const dictBase = ctx->dictBase; - const BYTE *const istart = (const BYTE *)src; - const BYTE *ip = istart; - const BYTE *anchor = istart; - const U32 lowestIndex = ctx->lowLimit; - const BYTE *const dictStart = dictBase + lowestIndex; - const U32 dictLimit = ctx->dictLimit; - const BYTE *const lowPrefixPtr = base + dictLimit; - const BYTE *const dictEnd = dictBase + dictLimit; - const BYTE *const iend = istart + srcSize; - const BYTE *const ilimit = iend - 8; - U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1]; - - /* Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - const size_t h = ZSTD_hashPtr(ip, hBits, mls); - const U32 matchIndex = hashTable[h]; - const BYTE *matchBase = matchIndex < dictLimit ? dictBase : base; - const BYTE *match = matchBase + matchIndex; - const U32 curr = (U32)(ip - base); - const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ - const BYTE *repBase = repIndex < dictLimit ? dictBase : base; - const BYTE *repMatch = repBase + repIndex; - size_t mLength; - hashTable[h] = curr; /* update hash table */ - - if ((((U32)((dictLimit - 1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) && - (ZSTD_read32(repMatch) == ZSTD_read32(ip + 1))) { - const BYTE *repMatchEnd = repIndex < dictLimit ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip + 1 + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32; - ip++; - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH); - } else { - if ((matchIndex < lowestIndex) || (ZSTD_read32(match) != ZSTD_read32(ip))) { - ip += ((ip - anchor) >> g_searchStrength) + 1; - continue; - } - { - const BYTE *matchEnd = matchIndex < dictLimit ? dictEnd : iend; - const BYTE *lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; - U32 offset; - mLength = ZSTD_count_2segments(ip + EQUAL_READ32, match + EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32; - while (((ip > anchor) & (match > lowMatchPtr)) && (ip[-1] == match[-1])) { - ip--; - match--; - mLength++; - } /* catch up */ - offset = curr - matchIndex; - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH); - } - } - - /* found a match : store it */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - hashTable[ZSTD_hashPtr(base + curr + 2, hBits, mls)] = curr + 2; - hashTable[ZSTD_hashPtr(ip - 2, hBits, mls)] = (U32)(ip - 2 - base); - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const curr2 = (U32)(ip - base); - U32 const repIndex2 = curr2 - offset_2; - const BYTE *repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; - if ((((U32)((dictLimit - 1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ - && (ZSTD_read32(repMatch2) == ZSTD_read32(ip))) { - const BYTE *const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; - size_t repLength2 = - ZSTD_count_2segments(ip + EQUAL_READ32, repMatch2 + EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32; - U32 tmpOffset = offset_2; - offset_2 = offset_1; - offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2 - MINMATCH); - hashTable[ZSTD_hashPtr(ip, hBits, mls)] = curr2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } - } - } - - /* save reps for next block */ - ctx->repToConfirm[0] = offset_1; - ctx->repToConfirm[1] = offset_2; - - /* Last Literals */ - { - size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ - U32 const mls = ctx->params.cParams.searchLength; - switch (mls) { - default: /* includes case 3 */ - case 4: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return; - case 5: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return; - case 6: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return; - case 7: ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return; - } -} - -/*-************************************* -* Double Fast -***************************************/ -static void ZSTD_fillDoubleHashTable(ZSTD_CCtx *cctx, const void *end, const U32 mls) -{ - U32 *const hashLarge = cctx->hashTable; - U32 const hBitsL = cctx->params.cParams.hashLog; - U32 *const hashSmall = cctx->chainTable; - U32 const hBitsS = cctx->params.cParams.chainLog; - const BYTE *const base = cctx->base; - const BYTE *ip = base + cctx->nextToUpdate; - const BYTE *const iend = ((const BYTE *)end) - HASH_READ_SIZE; - const size_t fastHashFillStep = 3; - - while (ip <= iend) { - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base); - hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base); - ip += fastHashFillStep; - } -} - -FORCE_INLINE -void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx *cctx, const void *src, size_t srcSize, const U32 mls) -{ - U32 *const hashLong = cctx->hashTable; - const U32 hBitsL = cctx->params.cParams.hashLog; - U32 *const hashSmall = cctx->chainTable; - const U32 hBitsS = cctx->params.cParams.chainLog; - seqStore_t *seqStorePtr = &(cctx->seqStore); - const BYTE *const base = cctx->base; - const BYTE *const istart = (const BYTE *)src; - const BYTE *ip = istart; - const BYTE *anchor = istart; - const U32 lowestIndex = cctx->dictLimit; - const BYTE *const lowest = base + lowestIndex; - const BYTE *const iend = istart + srcSize; - const BYTE *const ilimit = iend - HASH_READ_SIZE; - U32 offset_1 = cctx->rep[0], offset_2 = cctx->rep[1]; - U32 offsetSaved = 0; - - /* init */ - ip += (ip == lowest); - { - U32 const maxRep = (U32)(ip - lowest); - if (offset_2 > maxRep) - offsetSaved = offset_2, offset_2 = 0; - if (offset_1 > maxRep) - offsetSaved = offset_1, offset_1 = 0; - } - - /* Main Search Loop */ - while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ - size_t mLength; - size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); - size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); - U32 const curr = (U32)(ip - base); - U32 const matchIndexL = hashLong[h2]; - U32 const matchIndexS = hashSmall[h]; - const BYTE *matchLong = base + matchIndexL; - const BYTE *match = base + matchIndexS; - hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ - - if ((offset_1 > 0) & (ZSTD_read32(ip + 1 - offset_1) == ZSTD_read32(ip + 1))) { /* note : by construction, offset_1 <= curr */ - mLength = ZSTD_count(ip + 1 + 4, ip + 1 + 4 - offset_1, iend) + 4; - ip++; - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH); - } else { - U32 offset; - if ((matchIndexL > lowestIndex) && (ZSTD_read64(matchLong) == ZSTD_read64(ip))) { - mLength = ZSTD_count(ip + 8, matchLong + 8, iend) + 8; - offset = (U32)(ip - matchLong); - while (((ip > anchor) & (matchLong > lowest)) && (ip[-1] == matchLong[-1])) { - ip--; - matchLong--; - mLength++; - } /* catch up */ - } else if ((matchIndexS > lowestIndex) && (ZSTD_read32(match) == ZSTD_read32(ip))) { - size_t const h3 = ZSTD_hashPtr(ip + 1, hBitsL, 8); - U32 const matchIndex3 = hashLong[h3]; - const BYTE *match3 = base + matchIndex3; - hashLong[h3] = curr + 1; - if ((matchIndex3 > lowestIndex) && (ZSTD_read64(match3) == ZSTD_read64(ip + 1))) { - mLength = ZSTD_count(ip + 9, match3 + 8, iend) + 8; - ip++; - offset = (U32)(ip - match3); - while (((ip > anchor) & (match3 > lowest)) && (ip[-1] == match3[-1])) { - ip--; - match3--; - mLength++; - } /* catch up */ - } else { - mLength = ZSTD_count(ip + 4, match + 4, iend) + 4; - offset = (U32)(ip - match); - while (((ip > anchor) & (match > lowest)) && (ip[-1] == match[-1])) { - ip--; - match--; - mLength++; - } /* catch up */ - } - } else { - ip += ((ip - anchor) >> g_searchStrength) + 1; - continue; - } - - offset_2 = offset_1; - offset_1 = offset; - - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH); - } - - /* match found */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - hashLong[ZSTD_hashPtr(base + curr + 2, hBitsL, 8)] = hashSmall[ZSTD_hashPtr(base + curr + 2, hBitsS, mls)] = - curr + 2; /* here because curr+2 could be > iend-8 */ - hashLong[ZSTD_hashPtr(ip - 2, hBitsL, 8)] = hashSmall[ZSTD_hashPtr(ip - 2, hBitsS, mls)] = (U32)(ip - 2 - base); - - /* check immediate repcode */ - while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) { - /* store sequence */ - size_t const rLength = ZSTD_count(ip + 4, ip + 4 - offset_2, iend) + 4; - { - U32 const tmpOff = offset_2; - offset_2 = offset_1; - offset_1 = tmpOff; - } /* swap offset_2 <=> offset_1 */ - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base); - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base); - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength - MINMATCH); - ip += rLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } - } - } - - /* save reps for next block */ - cctx->repToConfirm[0] = offset_1 ? offset_1 : offsetSaved; - cctx->repToConfirm[1] = offset_2 ? offset_2 : offsetSaved; - - /* Last Literals */ - { - size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ - const U32 mls = ctx->params.cParams.searchLength; - switch (mls) { - default: /* includes case 3 */ - case 4: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return; - case 5: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return; - case 6: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return; - case 7: ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return; - } -} - -static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 mls) -{ - U32 *const hashLong = ctx->hashTable; - U32 const hBitsL = ctx->params.cParams.hashLog; - U32 *const hashSmall = ctx->chainTable; - U32 const hBitsS = ctx->params.cParams.chainLog; - seqStore_t *seqStorePtr = &(ctx->seqStore); - const BYTE *const base = ctx->base; - const BYTE *const dictBase = ctx->dictBase; - const BYTE *const istart = (const BYTE *)src; - const BYTE *ip = istart; - const BYTE *anchor = istart; - const U32 lowestIndex = ctx->lowLimit; - const BYTE *const dictStart = dictBase + lowestIndex; - const U32 dictLimit = ctx->dictLimit; - const BYTE *const lowPrefixPtr = base + dictLimit; - const BYTE *const dictEnd = dictBase + dictLimit; - const BYTE *const iend = istart + srcSize; - const BYTE *const ilimit = iend - 8; - U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1]; - - /* Search Loop */ - while (ip < ilimit) { /* < instead of <=, because (ip+1) */ - const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); - const U32 matchIndex = hashSmall[hSmall]; - const BYTE *matchBase = matchIndex < dictLimit ? dictBase : base; - const BYTE *match = matchBase + matchIndex; - - const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); - const U32 matchLongIndex = hashLong[hLong]; - const BYTE *matchLongBase = matchLongIndex < dictLimit ? dictBase : base; - const BYTE *matchLong = matchLongBase + matchLongIndex; - - const U32 curr = (U32)(ip - base); - const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ - const BYTE *repBase = repIndex < dictLimit ? dictBase : base; - const BYTE *repMatch = repBase + repIndex; - size_t mLength; - hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ - - if ((((U32)((dictLimit - 1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex)) && - (ZSTD_read32(repMatch) == ZSTD_read32(ip + 1))) { - const BYTE *repMatchEnd = repIndex < dictLimit ? dictEnd : iend; - mLength = ZSTD_count_2segments(ip + 1 + 4, repMatch + 4, iend, repMatchEnd, lowPrefixPtr) + 4; - ip++; - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, 0, mLength - MINMATCH); - } else { - if ((matchLongIndex > lowestIndex) && (ZSTD_read64(matchLong) == ZSTD_read64(ip))) { - const BYTE *matchEnd = matchLongIndex < dictLimit ? dictEnd : iend; - const BYTE *lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr; - U32 offset; - mLength = ZSTD_count_2segments(ip + 8, matchLong + 8, iend, matchEnd, lowPrefixPtr) + 8; - offset = curr - matchLongIndex; - while (((ip > anchor) & (matchLong > lowMatchPtr)) && (ip[-1] == matchLong[-1])) { - ip--; - matchLong--; - mLength++; - } /* catch up */ - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH); - - } else if ((matchIndex > lowestIndex) && (ZSTD_read32(match) == ZSTD_read32(ip))) { - size_t const h3 = ZSTD_hashPtr(ip + 1, hBitsL, 8); - U32 const matchIndex3 = hashLong[h3]; - const BYTE *const match3Base = matchIndex3 < dictLimit ? dictBase : base; - const BYTE *match3 = match3Base + matchIndex3; - U32 offset; - hashLong[h3] = curr + 1; - if ((matchIndex3 > lowestIndex) && (ZSTD_read64(match3) == ZSTD_read64(ip + 1))) { - const BYTE *matchEnd = matchIndex3 < dictLimit ? dictEnd : iend; - const BYTE *lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr; - mLength = ZSTD_count_2segments(ip + 9, match3 + 8, iend, matchEnd, lowPrefixPtr) + 8; - ip++; - offset = curr + 1 - matchIndex3; - while (((ip > anchor) & (match3 > lowMatchPtr)) && (ip[-1] == match3[-1])) { - ip--; - match3--; - mLength++; - } /* catch up */ - } else { - const BYTE *matchEnd = matchIndex < dictLimit ? dictEnd : iend; - const BYTE *lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr; - mLength = ZSTD_count_2segments(ip + 4, match + 4, iend, matchEnd, lowPrefixPtr) + 4; - offset = curr - matchIndex; - while (((ip > anchor) & (match > lowMatchPtr)) && (ip[-1] == match[-1])) { - ip--; - match--; - mLength++; - } /* catch up */ - } - offset_2 = offset_1; - offset_1 = offset; - ZSTD_storeSeq(seqStorePtr, ip - anchor, anchor, offset + ZSTD_REP_MOVE, mLength - MINMATCH); - - } else { - ip += ((ip - anchor) >> g_searchStrength) + 1; - continue; - } - } - - /* found a match : store it */ - ip += mLength; - anchor = ip; - - if (ip <= ilimit) { - /* Fill Table */ - hashSmall[ZSTD_hashPtr(base + curr + 2, hBitsS, mls)] = curr + 2; - hashLong[ZSTD_hashPtr(base + curr + 2, hBitsL, 8)] = curr + 2; - hashSmall[ZSTD_hashPtr(ip - 2, hBitsS, mls)] = (U32)(ip - 2 - base); - hashLong[ZSTD_hashPtr(ip - 2, hBitsL, 8)] = (U32)(ip - 2 - base); - /* check immediate repcode */ - while (ip <= ilimit) { - U32 const curr2 = (U32)(ip - base); - U32 const repIndex2 = curr2 - offset_2; - const BYTE *repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2; - if ((((U32)((dictLimit - 1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */ - && (ZSTD_read32(repMatch2) == ZSTD_read32(ip))) { - const BYTE *const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend; - size_t const repLength2 = - ZSTD_count_2segments(ip + EQUAL_READ32, repMatch2 + EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32; - U32 tmpOffset = offset_2; - offset_2 = offset_1; - offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2 - MINMATCH); - hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = curr2; - hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = curr2; - ip += repLength2; - anchor = ip; - continue; - } - break; - } - } - } - - /* save reps for next block */ - ctx->repToConfirm[0] = offset_1; - ctx->repToConfirm[1] = offset_2; - - /* Last Literals */ - { - size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ - U32 const mls = ctx->params.cParams.searchLength; - switch (mls) { - default: /* includes case 3 */ - case 4: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return; - case 5: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return; - case 6: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return; - case 7: ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return; - } -} - -/*-************************************* -* Binary Tree search -***************************************/ -/** ZSTD_insertBt1() : add one or multiple positions to tree. -* ip : assumed <= iend-8 . -* @return : nb of positions added */ -static U32 ZSTD_insertBt1(ZSTD_CCtx *zc, const BYTE *const ip, const U32 mls, const BYTE *const iend, U32 nbCompares, U32 extDict) -{ - U32 *const hashTable = zc->hashTable; - U32 const hashLog = zc->params.cParams.hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32 *const bt = zc->chainTable; - U32 const btLog = zc->params.cParams.chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 matchIndex = hashTable[h]; - size_t commonLengthSmaller = 0, commonLengthLarger = 0; - const BYTE *const base = zc->base; - const BYTE *const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE *const dictEnd = dictBase + dictLimit; - const BYTE *const prefixStart = base + dictLimit; - const BYTE *match; - const U32 curr = (U32)(ip - base); - const U32 btLow = btMask >= curr ? 0 : curr - btMask; - U32 *smallerPtr = bt + 2 * (curr & btMask); - U32 *largerPtr = smallerPtr + 1; - U32 dummy32; /* to be nullified at the end */ - U32 const windowLow = zc->lowLimit; - U32 matchEndIdx = curr + 8; - size_t bestLength = 8; - - hashTable[h] = curr; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32 *const nextPtr = bt + 2 * (matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - - if ((!extDict) || (matchIndex + matchLength >= dictLimit)) { - match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) - matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iend) + 1; - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iend, dictEnd, prefixStart); - if (matchIndex + matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > bestLength) { - bestLength = matchLength; - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - } - - if (ip + matchLength == iend) /* equal : no way to know if inf or sup */ - break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */ - - if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */ - /* match is smaller than curr */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { - smallerPtr = &dummy32; - break; - } /* beyond tree size, stop the search */ - smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to curr) */ - } else { - /* match is larger than curr */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { - largerPtr = &dummy32; - break; - } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } - } - - *smallerPtr = *largerPtr = 0; - if (bestLength > 384) - return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ - if (matchEndIdx > curr + 8) - return matchEndIdx - curr - 8; - return 1; -} - -static size_t ZSTD_insertBtAndFindBestMatch(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, size_t *offsetPtr, U32 nbCompares, const U32 mls, - U32 extDict) -{ - U32 *const hashTable = zc->hashTable; - U32 const hashLog = zc->params.cParams.hashLog; - size_t const h = ZSTD_hashPtr(ip, hashLog, mls); - U32 *const bt = zc->chainTable; - U32 const btLog = zc->params.cParams.chainLog - 1; - U32 const btMask = (1 << btLog) - 1; - U32 matchIndex = hashTable[h]; - size_t commonLengthSmaller = 0, commonLengthLarger = 0; - const BYTE *const base = zc->base; - const BYTE *const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE *const dictEnd = dictBase + dictLimit; - const BYTE *const prefixStart = base + dictLimit; - const U32 curr = (U32)(ip - base); - const U32 btLow = btMask >= curr ? 0 : curr - btMask; - const U32 windowLow = zc->lowLimit; - U32 *smallerPtr = bt + 2 * (curr & btMask); - U32 *largerPtr = bt + 2 * (curr & btMask) + 1; - U32 matchEndIdx = curr + 8; - U32 dummy32; /* to be nullified at the end */ - size_t bestLength = 0; - - hashTable[h] = curr; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32 *const nextPtr = bt + 2 * (matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE *match; - - if ((!extDict) || (matchIndex + matchLength >= dictLimit)) { - match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) - matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iend) + 1; - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iend, dictEnd, prefixStart); - if (matchIndex + matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > bestLength) { - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - if ((4 * (int)(matchLength - bestLength)) > (int)(ZSTD_highbit32(curr - matchIndex + 1) - ZSTD_highbit32((U32)offsetPtr[0] + 1))) - bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; - if (ip + matchLength == iend) /* equal : no way to know if inf or sup */ - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - - if (match[matchLength] < ip[matchLength]) { - /* match is smaller than curr */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { - smallerPtr = &dummy32; - break; - } /* beyond tree size, stop the search */ - smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to curr) */ - } else { - /* match is larger than curr */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { - largerPtr = &dummy32; - break; - } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } - } - - *smallerPtr = *largerPtr = 0; - - zc->nextToUpdate = (matchEndIdx > curr + 8) ? matchEndIdx - 8 : curr + 1; - return bestLength; -} - -static void ZSTD_updateTree(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, const U32 nbCompares, const U32 mls) -{ - const BYTE *const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while (idx < target) - idx += ZSTD_insertBt1(zc, base + idx, mls, iend, nbCompares, 0); -} - -/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ -static size_t ZSTD_BtFindBestMatch(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 mls) -{ - if (ip < zc->base + zc->nextToUpdate) - return 0; /* skipped area */ - ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0); -} - -static size_t ZSTD_BtFindBestMatch_selectMLS(ZSTD_CCtx *zc, /* Index table will be updated */ - const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 matchLengthSearch) -{ - switch (matchLengthSearch) { - default: /* includes case 3 */ - case 4: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 7: - case 6: return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); - } -} - -static void ZSTD_updateTree_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iend, const U32 nbCompares, const U32 mls) -{ - const BYTE *const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while (idx < target) - idx += ZSTD_insertBt1(zc, base + idx, mls, iend, nbCompares, 1); -} - -/** Tree updater, providing best match */ -static size_t ZSTD_BtFindBestMatch_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, - const U32 mls) -{ - if (ip < zc->base + zc->nextToUpdate) - return 0; /* skipped area */ - ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1); -} - -static size_t ZSTD_BtFindBestMatch_selectMLS_extDict(ZSTD_CCtx *zc, /* Index table will be updated */ - const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, - const U32 matchLengthSearch) -{ - switch (matchLengthSearch) { - default: /* includes case 3 */ - case 4: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4); - case 5: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5); - case 7: - case 6: return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6); - } -} - -/* ********************************* -* Hash Chain -***********************************/ -#define NEXT_IN_CHAIN(d, mask) chainTable[(d)&mask] - -/* Update chains up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ -FORCE_INLINE -U32 ZSTD_insertAndFindFirstIndex(ZSTD_CCtx *zc, const BYTE *ip, U32 mls) -{ - U32 *const hashTable = zc->hashTable; - const U32 hashLog = zc->params.cParams.hashLog; - U32 *const chainTable = zc->chainTable; - const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1; - const BYTE *const base = zc->base; - const U32 target = (U32)(ip - base); - U32 idx = zc->nextToUpdate; - - while (idx < target) { /* catch up */ - size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); - NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; - hashTable[h] = idx; - idx++; - } - - zc->nextToUpdate = target; - return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; -} - -/* inlining is important to hardwire a hot branch (template emulation) */ -FORCE_INLINE -size_t ZSTD_HcFindBestMatch_generic(ZSTD_CCtx *zc, /* Index table will be updated */ - const BYTE *const ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, const U32 mls, - const U32 extDict) -{ - U32 *const chainTable = zc->chainTable; - const U32 chainSize = (1 << zc->params.cParams.chainLog); - const U32 chainMask = chainSize - 1; - const BYTE *const base = zc->base; - const BYTE *const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE *const prefixStart = base + dictLimit; - const BYTE *const dictEnd = dictBase + dictLimit; - const U32 lowLimit = zc->lowLimit; - const U32 curr = (U32)(ip - base); - const U32 minChain = curr > chainSize ? curr - chainSize : 0; - int nbAttempts = maxNbAttempts; - size_t ml = EQUAL_READ32 - 1; - - /* HC4 match finder */ - U32 matchIndex = ZSTD_insertAndFindFirstIndex(zc, ip, mls); - - for (; (matchIndex > lowLimit) & (nbAttempts > 0); nbAttempts--) { - const BYTE *match; - size_t currMl = 0; - if ((!extDict) || matchIndex >= dictLimit) { - match = base + matchIndex; - if (match[ml] == ip[ml]) /* potentially better */ - currMl = ZSTD_count(ip, match, iLimit); - } else { - match = dictBase + matchIndex; - if (ZSTD_read32(match) == ZSTD_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ - currMl = ZSTD_count_2segments(ip + EQUAL_READ32, match + EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32; - } - - /* save best solution */ - if (currMl > ml) { - ml = currMl; - *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE; - if (ip + currMl == iLimit) - break; /* best possible, and avoid read overflow*/ - } - - if (matchIndex <= minChain) - break; - matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); - } - - return ml; -} - -FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS(ZSTD_CCtx *zc, const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, - const U32 matchLengthSearch) -{ - switch (matchLengthSearch) { - default: /* includes case 3 */ - case 4: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0); - case 5: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0); - case 7: - case 6: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0); - } -} - -FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS(ZSTD_CCtx *zc, const BYTE *ip, const BYTE *const iLimit, size_t *offsetPtr, const U32 maxNbAttempts, - const U32 matchLengthSearch) -{ - switch (matchLengthSearch) { - default: /* includes case 3 */ - case 4: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1); - case 5: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1); - case 7: - case 6: return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1); - } -} - -/* ******************************* -* Common parser - lazy strategy -*********************************/ -FORCE_INLINE -void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 searchMethod, const U32 depth) -{ - seqStore_t *seqStorePtr = &(ctx->seqStore); - const BYTE *const istart = (const BYTE *)src; - const BYTE *ip = istart; - const BYTE *anchor = istart; - const BYTE *const iend = istart + srcSize; - const BYTE *const ilimit = iend - 8; - const BYTE *const base = ctx->base + ctx->dictLimit; - - U32 const maxSearches = 1 << ctx->params.cParams.searchLog; - U32 const mls = ctx->params.cParams.searchLength; - - typedef size_t (*searchMax_f)(ZSTD_CCtx * zc, const BYTE *ip, const BYTE *iLimit, size_t *offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch); - searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS; - U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset = 0; - - /* init */ - ip += (ip == base); - ctx->nextToUpdate3 = ctx->nextToUpdate; - { - U32 const maxRep = (U32)(ip - base); - if (offset_2 > maxRep) - savedOffset = offset_2, offset_2 = 0; - if (offset_1 > maxRep) - savedOffset = offset_1, offset_1 = 0; - } - - /* Match Loop */ - while (ip < ilimit) { - size_t matchLength = 0; - size_t offset = 0; - const BYTE *start = ip + 1; - - /* check repCode */ - if ((offset_1 > 0) & (ZSTD_read32(ip + 1) == ZSTD_read32(ip + 1 - offset_1))) { - /* repcode : we take it */ - matchLength = ZSTD_count(ip + 1 + EQUAL_READ32, ip + 1 + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32; - if (depth == 0) - goto _storeSequence; - } - - /* first search (depth 0) */ - { - size_t offsetFound = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset = offsetFound; - } - - if (matchLength < EQUAL_READ32) { - ip += ((ip - anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } - - /* let's try to find a better solution */ - if (depth >= 1) - while (ip < ilimit) { - ip++; - if ((offset) && ((offset_1 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_1)))) { - size_t const mlRep = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32; - int const gain2 = (int)(mlRep * 3); - int const gain1 = (int)(matchLength * 3 - ZSTD_highbit32((U32)offset + 1) + 1); - if ((mlRep >= EQUAL_READ32) && (gain2 > gain1)) - matchLength = mlRep, offset = 0, start = ip; - } - { - size_t offset2 = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */ - int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 4); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } - } - - /* let's find an even better one */ - if ((depth == 2) && (ip < ilimit)) { - ip++; - if ((offset) && ((offset_1 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_1)))) { - size_t const ml2 = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_1, iend) + EQUAL_READ32; - int const gain2 = (int)(ml2 * 4); - int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 1); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) - matchLength = ml2, offset = 0, start = ip; - } - { - size_t offset2 = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */ - int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 7); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; - } - } - } - break; /* nothing found : store previous solution */ - } - - /* NOTE: - * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. - * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which - * overflows the pointer, which is undefined behavior. - */ - /* catch up */ - if (offset) { - while ((start > anchor) && (start > base + offset - ZSTD_REP_MOVE) && - (start[-1] == (start-offset+ZSTD_REP_MOVE)[-1])) /* only search for offset within prefix */ - { - start--; - matchLength++; - } - offset_2 = offset_1; - offset_1 = (U32)(offset - ZSTD_REP_MOVE); - } - - /* store sequence */ -_storeSequence: - { - size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength - MINMATCH); - anchor = ip = start + matchLength; - } - - /* check immediate repcode */ - while ((ip <= ilimit) && ((offset_2 > 0) & (ZSTD_read32(ip) == ZSTD_read32(ip - offset_2)))) { - /* store sequence */ - matchLength = ZSTD_count(ip + EQUAL_READ32, ip + EQUAL_READ32 - offset_2, iend) + EQUAL_READ32; - offset = offset_2; - offset_2 = offset_1; - offset_1 = (U32)offset; /* swap repcodes */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength - MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } - } - - /* Save reps for next block */ - ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset; - ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset; - - /* Last Literals */ - { - size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); } - -static void ZSTD_compressBlock_lazy2(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); } - -static void ZSTD_compressBlock_lazy(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); } - -static void ZSTD_compressBlock_greedy(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); } - -FORCE_INLINE -void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 searchMethod, const U32 depth) -{ - seqStore_t *seqStorePtr = &(ctx->seqStore); - const BYTE *const istart = (const BYTE *)src; - const BYTE *ip = istart; - const BYTE *anchor = istart; - const BYTE *const iend = istart + srcSize; - const BYTE *const ilimit = iend - 8; - const BYTE *const base = ctx->base; - const U32 dictLimit = ctx->dictLimit; - const U32 lowestIndex = ctx->lowLimit; - const BYTE *const prefixStart = base + dictLimit; - const BYTE *const dictBase = ctx->dictBase; - const BYTE *const dictEnd = dictBase + dictLimit; - const BYTE *const dictStart = dictBase + ctx->lowLimit; - - const U32 maxSearches = 1 << ctx->params.cParams.searchLog; - const U32 mls = ctx->params.cParams.searchLength; - - typedef size_t (*searchMax_f)(ZSTD_CCtx * zc, const BYTE *ip, const BYTE *iLimit, size_t *offsetPtr, U32 maxNbAttempts, U32 matchLengthSearch); - searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS; - - U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1]; - - /* init */ - ctx->nextToUpdate3 = ctx->nextToUpdate; - ip += (ip == prefixStart); - - /* Match Loop */ - while (ip < ilimit) { - size_t matchLength = 0; - size_t offset = 0; - const BYTE *start = ip + 1; - U32 curr = (U32)(ip - base); - - /* check repCode */ - { - const U32 repIndex = (U32)(curr + 1 - offset_1); - const BYTE *const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE *const repMatch = repBase + repIndex; - if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - if (ZSTD_read32(ip + 1) == ZSTD_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = - ZSTD_count_2segments(ip + 1 + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; - if (depth == 0) - goto _storeSequence; - } - } - - /* first search (depth 0) */ - { - size_t offsetFound = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls); - if (ml2 > matchLength) - matchLength = ml2, start = ip, offset = offsetFound; - } - - if (matchLength < EQUAL_READ32) { - ip += ((ip - anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */ - continue; - } - - /* let's try to find a better solution */ - if (depth >= 1) - while (ip < ilimit) { - ip++; - curr++; - /* check repCode */ - if (offset) { - const U32 repIndex = (U32)(curr - offset_1); - const BYTE *const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE *const repMatch = repBase + repIndex; - if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) { - /* repcode detected */ - const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t const repLength = - ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) + - EQUAL_READ32; - int const gain2 = (int)(repLength * 3); - int const gain1 = (int)(matchLength * 3 - ZSTD_highbit32((U32)offset + 1) + 1); - if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } - } - - /* search match, depth 1 */ - { - size_t offset2 = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */ - int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 4); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; /* search a better one */ - } - } - - /* let's find an even better one */ - if ((depth == 2) && (ip < ilimit)) { - ip++; - curr++; - /* check repCode */ - if (offset) { - const U32 repIndex = (U32)(curr - offset_1); - const BYTE *const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE *const repMatch = repBase + repIndex; - if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) { - /* repcode detected */ - const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend; - size_t repLength = ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend, - repEnd, prefixStart) + - EQUAL_READ32; - int gain2 = (int)(repLength * 4); - int gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 1); - if ((repLength >= EQUAL_READ32) && (gain2 > gain1)) - matchLength = repLength, offset = 0, start = ip; - } - } - - /* search match, depth 2 */ - { - size_t offset2 = 99999999; - size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls); - int const gain2 = (int)(ml2 * 4 - ZSTD_highbit32((U32)offset2 + 1)); /* raw approx */ - int const gain1 = (int)(matchLength * 4 - ZSTD_highbit32((U32)offset + 1) + 7); - if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) { - matchLength = ml2, offset = offset2, start = ip; - continue; - } - } - } - break; /* nothing found : store previous solution */ - } - - /* catch up */ - if (offset) { - U32 const matchIndex = (U32)((start - base) - (offset - ZSTD_REP_MOVE)); - const BYTE *match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; - const BYTE *const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; - while ((start > anchor) && (match > mStart) && (start[-1] == match[-1])) { - start--; - match--; - matchLength++; - } /* catch up */ - offset_2 = offset_1; - offset_1 = (U32)(offset - ZSTD_REP_MOVE); - } - - /* store sequence */ - _storeSequence : { - size_t const litLength = start - anchor; - ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength - MINMATCH); - anchor = ip = start + matchLength; - } - - /* check immediate repcode */ - while (ip <= ilimit) { - const U32 repIndex = (U32)((ip - base) - offset_2); - const BYTE *const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE *const repMatch = repBase + repIndex; - if (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - if (ZSTD_read32(ip) == ZSTD_read32(repMatch)) { - /* repcode detected we should take it */ - const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend; - matchLength = - ZSTD_count_2segments(ip + EQUAL_READ32, repMatch + EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32; - offset = offset_2; - offset_2 = offset_1; - offset_1 = (U32)offset; /* swap offset history */ - ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength - MINMATCH); - ip += matchLength; - anchor = ip; - continue; /* faster when present ... (?) */ - } - break; - } - } - - /* Save reps for next block */ - ctx->repToConfirm[0] = offset_1; - ctx->repToConfirm[1] = offset_2; - - /* Last Literals */ - { - size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0); } - -static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1); -} - -static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2); -} - -static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ - ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2); -} - -/* The optimal parser */ -#include "zstd_opt.h" - -static void ZSTD_compressBlock_btopt(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ -#ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0); -#else - (void)ctx; - (void)src; - (void)srcSize; - return; -#endif -} - -static void ZSTD_compressBlock_btopt2(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ -#ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1); -#else - (void)ctx; - (void)src; - (void)srcSize; - return; -#endif -} - -static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ -#ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0); -#else - (void)ctx; - (void)src; - (void)srcSize; - return; -#endif -} - -static void ZSTD_compressBlock_btopt2_extDict(ZSTD_CCtx *ctx, const void *src, size_t srcSize) -{ -#ifdef ZSTD_OPT_H_91842398743 - ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1); -#else - (void)ctx; - (void)src; - (void)srcSize; - return; -#endif -} - -typedef void (*ZSTD_blockCompressor)(ZSTD_CCtx *ctx, const void *src, size_t srcSize); - -static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict) -{ - static const ZSTD_blockCompressor blockCompressor[2][8] = { - {ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, - ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt2}, - {ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict, - ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt2_extDict}}; - - return blockCompressor[extDict][(U32)strat]; -} - -static size_t ZSTD_compressBlock_internal(ZSTD_CCtx *zc, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit); - const BYTE *const base = zc->base; - const BYTE *const istart = (const BYTE *)src; - const U32 curr = (U32)(istart - base); - if (srcSize < MIN_CBLOCK_SIZE + ZSTD_blockHeaderSize + 1) - return 0; /* don't even attempt compression below a certain srcSize */ - ZSTD_resetSeqStore(&(zc->seqStore)); - if (curr > zc->nextToUpdate + 384) - zc->nextToUpdate = curr - MIN(192, (U32)(curr - zc->nextToUpdate - 384)); /* update tree not updated after finding very long rep matches */ - blockCompressor(zc, src, srcSize); - return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize); -} - -/*! ZSTD_compress_generic() : -* Compress a chunk of data into one or multiple blocks. -* All blocks will be terminated, all input will be consumed. -* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. -* Frame is supposed already started (header already produced) -* @return : compressed size, or an error code -*/ -static size_t ZSTD_compress_generic(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, U32 lastFrameChunk) -{ - size_t blockSize = cctx->blockSize; - size_t remaining = srcSize; - const BYTE *ip = (const BYTE *)src; - BYTE *const ostart = (BYTE *)dst; - BYTE *op = ostart; - U32 const maxDist = 1 << cctx->params.cParams.windowLog; - - if (cctx->params.fParams.checksumFlag && srcSize) - xxh64_update(&cctx->xxhState, src, srcSize); - - while (remaining) { - U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); - size_t cSize; - - if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) - return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ - if (remaining < blockSize) - blockSize = remaining; - - /* preemptive overflow correction */ - if (cctx->lowLimit > (3U << 29)) { - U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1; - U32 const curr = (U32)(ip - cctx->base); - U32 const newCurr = (curr & cycleMask) + (1 << cctx->params.cParams.windowLog); - U32 const correction = curr - newCurr; - ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30); - ZSTD_reduceIndex(cctx, correction); - cctx->base += correction; - cctx->dictBase += correction; - cctx->lowLimit -= correction; - cctx->dictLimit -= correction; - if (cctx->nextToUpdate < correction) - cctx->nextToUpdate = 0; - else - cctx->nextToUpdate -= correction; - } - - if ((U32)(ip + blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) { - /* enforce maxDist */ - U32 const newLowLimit = (U32)(ip + blockSize - cctx->base) - maxDist; - if (cctx->lowLimit < newLowLimit) - cctx->lowLimit = newLowLimit; - if (cctx->dictLimit < cctx->lowLimit) - cctx->dictLimit = cctx->lowLimit; - } - - cSize = ZSTD_compressBlock_internal(cctx, op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize, ip, blockSize); - if (ZSTD_isError(cSize)) - return cSize; - - if (cSize == 0) { /* block is not compressible */ - U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw) << 1) + (U32)(blockSize << 3); - if (blockSize + ZSTD_blockHeaderSize > dstCapacity) - return ERROR(dstSize_tooSmall); - ZSTD_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */ - memcpy(op + ZSTD_blockHeaderSize, ip, blockSize); - cSize = ZSTD_blockHeaderSize + blockSize; - } else { - U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed) << 1) + (U32)(cSize << 3); - ZSTD_writeLE24(op, cBlockHeader24); - cSize += ZSTD_blockHeaderSize; - } - - remaining -= blockSize; - dstCapacity -= cSize; - ip += blockSize; - op += cSize; - } - - if (lastFrameChunk && (op > ostart)) - cctx->stage = ZSTDcs_ending; - return op - ostart; -} - -static size_t ZSTD_writeFrameHeader(void *dst, size_t dstCapacity, ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID) -{ - BYTE *const op = (BYTE *)dst; - U32 const dictIDSizeCode = (dictID > 0) + (dictID >= 256) + (dictID >= 65536); /* 0-3 */ - U32 const checksumFlag = params.fParams.checksumFlag > 0; - U32 const windowSize = 1U << params.cParams.windowLog; - U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); - BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); - U32 const fcsCode = - params.fParams.contentSizeFlag ? (pledgedSrcSize >= 256) + (pledgedSrcSize >= 65536 + 256) + (pledgedSrcSize >= 0xFFFFFFFFU) : 0; /* 0-3 */ - BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag << 2) + (singleSegment << 5) + (fcsCode << 6)); - size_t pos; - - if (dstCapacity < ZSTD_frameHeaderSize_max) - return ERROR(dstSize_tooSmall); - - ZSTD_writeLE32(dst, ZSTD_MAGICNUMBER); - op[4] = frameHeaderDecriptionByte; - pos = 5; - if (!singleSegment) - op[pos++] = windowLogByte; - switch (dictIDSizeCode) { - default: /* impossible */ - case 0: break; - case 1: - op[pos] = (BYTE)(dictID); - pos++; - break; - case 2: - ZSTD_writeLE16(op + pos, (U16)dictID); - pos += 2; - break; - case 3: - ZSTD_writeLE32(op + pos, dictID); - pos += 4; - break; - } - switch (fcsCode) { - default: /* impossible */ - case 0: - if (singleSegment) - op[pos++] = (BYTE)(pledgedSrcSize); - break; - case 1: - ZSTD_writeLE16(op + pos, (U16)(pledgedSrcSize - 256)); - pos += 2; - break; - case 2: - ZSTD_writeLE32(op + pos, (U32)(pledgedSrcSize)); - pos += 4; - break; - case 3: - ZSTD_writeLE64(op + pos, (U64)(pledgedSrcSize)); - pos += 8; - break; - } - return pos; -} - -static size_t ZSTD_compressContinue_internal(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, U32 frame, U32 lastFrameChunk) -{ - const BYTE *const ip = (const BYTE *)src; - size_t fhSize = 0; - - if (cctx->stage == ZSTDcs_created) - return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */ - - if (frame && (cctx->stage == ZSTDcs_init)) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, cctx->frameContentSize, cctx->dictID); - if (ZSTD_isError(fhSize)) - return fhSize; - dstCapacity -= fhSize; - dst = (char *)dst + fhSize; - cctx->stage = ZSTDcs_ongoing; - } - - /* Check if blocks follow each other */ - if (src != cctx->nextSrc) { - /* not contiguous */ - ptrdiff_t const delta = cctx->nextSrc - ip; - cctx->lowLimit = cctx->dictLimit; - cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base); - cctx->dictBase = cctx->base; - cctx->base -= delta; - cctx->nextToUpdate = cctx->dictLimit; - if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE) - cctx->lowLimit = cctx->dictLimit; /* too small extDict */ - } - - /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ - if ((ip + srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) { - ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase; - U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx; - cctx->lowLimit = lowLimitMax; - } - - cctx->nextSrc = ip + srcSize; - - if (srcSize) { - size_t const cSize = frame ? ZSTD_compress_generic(cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) - : ZSTD_compressBlock_internal(cctx, dst, dstCapacity, src, srcSize); - if (ZSTD_isError(cSize)) - return cSize; - return cSize + fhSize; - } else - return fhSize; -} - -size_t ZSTD_compressContinue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 0); -} - -size_t ZSTD_getBlockSizeMax(ZSTD_CCtx *cctx) { return MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << cctx->params.cParams.windowLog); } - -size_t ZSTD_compressBlock(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx); - if (srcSize > blockSizeMax) - return ERROR(srcSize_wrong); - return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0); -} - -/*! ZSTD_loadDictionaryContent() : - * @return : 0, or an error code - */ -static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx *zc, const void *src, size_t srcSize) -{ - const BYTE *const ip = (const BYTE *)src; - const BYTE *const iend = ip + srcSize; - - /* input becomes curr prefix */ - zc->lowLimit = zc->dictLimit; - zc->dictLimit = (U32)(zc->nextSrc - zc->base); - zc->dictBase = zc->base; - zc->base += ip - zc->nextSrc; - zc->nextToUpdate = zc->dictLimit; - zc->loadedDictEnd = zc->forceWindow ? 0 : (U32)(iend - zc->base); - - zc->nextSrc = iend; - if (srcSize <= HASH_READ_SIZE) - return 0; - - switch (zc->params.cParams.strategy) { - case ZSTD_fast: ZSTD_fillHashTable(zc, iend, zc->params.cParams.searchLength); break; - - case ZSTD_dfast: ZSTD_fillDoubleHashTable(zc, iend, zc->params.cParams.searchLength); break; - - case ZSTD_greedy: - case ZSTD_lazy: - case ZSTD_lazy2: - if (srcSize >= HASH_READ_SIZE) - ZSTD_insertAndFindFirstIndex(zc, iend - HASH_READ_SIZE, zc->params.cParams.searchLength); - break; - - case ZSTD_btlazy2: - case ZSTD_btopt: - case ZSTD_btopt2: - if (srcSize >= HASH_READ_SIZE) - ZSTD_updateTree(zc, iend - HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength); - break; - - default: - return ERROR(GENERIC); /* strategy doesn't exist; impossible */ - } - - zc->nextToUpdate = (U32)(iend - zc->base); - return 0; -} - -/* Dictionaries that assign zero probability to symbols that show up causes problems - when FSE encoding. Refuse dictionaries that assign zero probability to symbols - that we may encounter during compression. - NOTE: This behavior is not standard and could be improved in the future. */ -static size_t ZSTD_checkDictNCount(short *normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) -{ - U32 s; - if (dictMaxSymbolValue < maxSymbolValue) - return ERROR(dictionary_corrupted); - for (s = 0; s <= maxSymbolValue; ++s) { - if (normalizedCounter[s] == 0) - return ERROR(dictionary_corrupted); - } - return 0; -} - -/* Dictionary format : - * See : - * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#dictionary-format - */ -/*! ZSTD_loadZstdDictionary() : - * @return : 0, or an error code - * assumptions : magic number supposed already checked - * dictSize supposed > 8 - */ -static size_t ZSTD_loadZstdDictionary(ZSTD_CCtx *cctx, const void *dict, size_t dictSize) -{ - const BYTE *dictPtr = (const BYTE *)dict; - const BYTE *const dictEnd = dictPtr + dictSize; - short offcodeNCount[MaxOff + 1]; - unsigned offcodeMaxValue = MaxOff; - - dictPtr += 4; /* skip magic number */ - cctx->dictID = cctx->params.fParams.noDictIDFlag ? 0 : ZSTD_readLE32(dictPtr); - dictPtr += 4; - - { - size_t const hufHeaderSize = HUF_readCTable_wksp(cctx->hufTable, 255, dictPtr, dictEnd - dictPtr, cctx->tmpCounters, sizeof(cctx->tmpCounters)); - if (HUF_isError(hufHeaderSize)) - return ERROR(dictionary_corrupted); - dictPtr += hufHeaderSize; - } - - { - unsigned offcodeLog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd - dictPtr); - if (FSE_isError(offcodeHeaderSize)) - return ERROR(dictionary_corrupted); - if (offcodeLog > OffFSELog) - return ERROR(dictionary_corrupted); - /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ - CHECK_E(FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)), - dictionary_corrupted); - dictPtr += offcodeHeaderSize; - } - - { - short matchlengthNCount[MaxML + 1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog; - size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd - dictPtr); - if (FSE_isError(matchlengthHeaderSize)) - return ERROR(dictionary_corrupted); - if (matchlengthLog > MLFSELog) - return ERROR(dictionary_corrupted); - /* Every match length code must have non-zero probability */ - CHECK_F(ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); - CHECK_E( - FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)), - dictionary_corrupted); - dictPtr += matchlengthHeaderSize; - } - - { - short litlengthNCount[MaxLL + 1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog; - size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd - dictPtr); - if (FSE_isError(litlengthHeaderSize)) - return ERROR(dictionary_corrupted); - if (litlengthLog > LLFSELog) - return ERROR(dictionary_corrupted); - /* Every literal length code must have non-zero probability */ - CHECK_F(ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); - CHECK_E(FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, cctx->tmpCounters, sizeof(cctx->tmpCounters)), - dictionary_corrupted); - dictPtr += litlengthHeaderSize; - } - - if (dictPtr + 12 > dictEnd) - return ERROR(dictionary_corrupted); - cctx->rep[0] = ZSTD_readLE32(dictPtr + 0); - cctx->rep[1] = ZSTD_readLE32(dictPtr + 4); - cctx->rep[2] = ZSTD_readLE32(dictPtr + 8); - dictPtr += 12; - - { - size_t const dictContentSize = (size_t)(dictEnd - dictPtr); - U32 offcodeMax = MaxOff; - if (dictContentSize <= ((U32)-1) - 128 KB) { - U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ - offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ - } - /* All offset values <= dictContentSize + 128 KB must be representable */ - CHECK_F(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); - /* All repCodes must be <= dictContentSize and != 0*/ - { - U32 u; - for (u = 0; u < 3; u++) { - if (cctx->rep[u] == 0) - return ERROR(dictionary_corrupted); - if (cctx->rep[u] > dictContentSize) - return ERROR(dictionary_corrupted); - } - } - - cctx->flagStaticTables = 1; - cctx->flagStaticHufTable = HUF_repeat_valid; - return ZSTD_loadDictionaryContent(cctx, dictPtr, dictContentSize); - } -} - -/** ZSTD_compress_insertDictionary() : -* @return : 0, or an error code */ -static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx *cctx, const void *dict, size_t dictSize) -{ - if ((dict == NULL) || (dictSize <= 8)) - return 0; - - /* dict as pure content */ - if ((ZSTD_readLE32(dict) != ZSTD_DICT_MAGIC) || (cctx->forceRawDict)) - return ZSTD_loadDictionaryContent(cctx, dict, dictSize); - - /* dict as zstd dictionary */ - return ZSTD_loadZstdDictionary(cctx, dict, dictSize); -} - -/*! ZSTD_compressBegin_internal() : -* @return : 0, or an error code */ -static size_t ZSTD_compressBegin_internal(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, ZSTD_parameters params, U64 pledgedSrcSize) -{ - ZSTD_compResetPolicy_e const crp = dictSize ? ZSTDcrp_fullReset : ZSTDcrp_continue; - CHECK_F(ZSTD_resetCCtx_advanced(cctx, params, pledgedSrcSize, crp)); - return ZSTD_compress_insertDictionary(cctx, dict, dictSize); -} - -/*! ZSTD_compressBegin_advanced() : -* @return : 0, or an error code */ -size_t ZSTD_compressBegin_advanced(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize) -{ - /* compression parameters verification and optimization */ - CHECK_F(ZSTD_checkCParams(params.cParams)); - return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize); -} - -size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx *cctx, const void *dict, size_t dictSize, int compressionLevel) -{ - ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize); - return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0); -} - -size_t ZSTD_compressBegin(ZSTD_CCtx *cctx, int compressionLevel) { return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); } - -/*! ZSTD_writeEpilogue() : -* Ends a frame. -* @return : nb of bytes written into dst (or an error code) */ -static size_t ZSTD_writeEpilogue(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity) -{ - BYTE *const ostart = (BYTE *)dst; - BYTE *op = ostart; - size_t fhSize = 0; - - if (cctx->stage == ZSTDcs_created) - return ERROR(stage_wrong); /* init missing */ - - /* special case : empty frame */ - if (cctx->stage == ZSTDcs_init) { - fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0); - if (ZSTD_isError(fhSize)) - return fhSize; - dstCapacity -= fhSize; - op += fhSize; - cctx->stage = ZSTDcs_ongoing; - } - - if (cctx->stage != ZSTDcs_ending) { - /* write one last empty block, make it the "last" block */ - U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw) << 1) + 0; - if (dstCapacity < 4) - return ERROR(dstSize_tooSmall); - ZSTD_writeLE32(op, cBlockHeader24); - op += ZSTD_blockHeaderSize; - dstCapacity -= ZSTD_blockHeaderSize; - } - - if (cctx->params.fParams.checksumFlag) { - U32 const checksum = (U32)xxh64_digest(&cctx->xxhState); - if (dstCapacity < 4) - return ERROR(dstSize_tooSmall); - ZSTD_writeLE32(op, checksum); - op += 4; - } - - cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ - return op - ostart; -} - -size_t ZSTD_compressEnd(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - size_t endResult; - size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 1); - if (ZSTD_isError(cSize)) - return cSize; - endResult = ZSTD_writeEpilogue(cctx, (char *)dst + cSize, dstCapacity - cSize); - if (ZSTD_isError(endResult)) - return endResult; - return cSize + endResult; -} - -static size_t ZSTD_compress_internal(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize, - ZSTD_parameters params) -{ - CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, params, srcSize)); - return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); -} - -size_t ZSTD_compress_usingDict(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize, - ZSTD_parameters params) -{ - return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); -} - -size_t ZSTD_compressCCtx(ZSTD_CCtx *ctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, ZSTD_parameters params) -{ - return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, NULL, 0, params); -} - -/* ===== Dictionary API ===== */ - -struct ZSTD_CDict_s { - void *dictBuffer; - const void *dictContent; - size_t dictContentSize; - ZSTD_CCtx *refContext; -}; /* typedef'd tp ZSTD_CDict within "zstd.h" */ - -size_t ZSTD_CDictWorkspaceBound(ZSTD_compressionParameters cParams) { return ZSTD_CCtxWorkspaceBound(cParams) + ZSTD_ALIGN(sizeof(ZSTD_CDict)); } - -static ZSTD_CDict *ZSTD_createCDict_advanced(const void *dictBuffer, size_t dictSize, unsigned byReference, ZSTD_parameters params, ZSTD_customMem customMem) -{ - if (!customMem.customAlloc || !customMem.customFree) - return NULL; - - { - ZSTD_CDict *const cdict = (ZSTD_CDict *)ZSTD_malloc(sizeof(ZSTD_CDict), customMem); - ZSTD_CCtx *const cctx = ZSTD_createCCtx_advanced(customMem); - - if (!cdict || !cctx) { - ZSTD_free(cdict, customMem); - ZSTD_freeCCtx(cctx); - return NULL; - } - - if ((byReference) || (!dictBuffer) || (!dictSize)) { - cdict->dictBuffer = NULL; - cdict->dictContent = dictBuffer; - } else { - void *const internalBuffer = ZSTD_malloc(dictSize, customMem); - if (!internalBuffer) { - ZSTD_free(cctx, customMem); - ZSTD_free(cdict, customMem); - return NULL; - } - memcpy(internalBuffer, dictBuffer, dictSize); - cdict->dictBuffer = internalBuffer; - cdict->dictContent = internalBuffer; - } - - { - size_t const errorCode = ZSTD_compressBegin_advanced(cctx, cdict->dictContent, dictSize, params, 0); - if (ZSTD_isError(errorCode)) { - ZSTD_free(cdict->dictBuffer, customMem); - ZSTD_free(cdict, customMem); - ZSTD_freeCCtx(cctx); - return NULL; - } - } - - cdict->refContext = cctx; - cdict->dictContentSize = dictSize; - return cdict; - } -} - -ZSTD_CDict *ZSTD_initCDict(const void *dict, size_t dictSize, ZSTD_parameters params, void *workspace, size_t workspaceSize) -{ - ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); - return ZSTD_createCDict_advanced(dict, dictSize, 1, params, stackMem); -} - -size_t ZSTD_freeCDict(ZSTD_CDict *cdict) -{ - if (cdict == NULL) - return 0; /* support free on NULL */ - { - ZSTD_customMem const cMem = cdict->refContext->customMem; - ZSTD_freeCCtx(cdict->refContext); - ZSTD_free(cdict->dictBuffer, cMem); - ZSTD_free(cdict, cMem); - return 0; - } -} - -static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict *cdict) { return ZSTD_getParamsFromCCtx(cdict->refContext); } - -size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx *cctx, const ZSTD_CDict *cdict, unsigned long long pledgedSrcSize) -{ - if (cdict->dictContentSize) - CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize)) - else { - ZSTD_parameters params = cdict->refContext->params; - params.fParams.contentSizeFlag = (pledgedSrcSize > 0); - CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, params, pledgedSrcSize)); - } - return 0; -} - -/*! ZSTD_compress_usingCDict() : -* Compression using a digested Dictionary. -* Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. -* Note that compression level is decided during dictionary creation */ -size_t ZSTD_compress_usingCDict(ZSTD_CCtx *cctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const ZSTD_CDict *cdict) -{ - CHECK_F(ZSTD_compressBegin_usingCDict(cctx, cdict, srcSize)); - - if (cdict->refContext->params.fParams.contentSizeFlag == 1) { - cctx->params.fParams.contentSizeFlag = 1; - cctx->frameContentSize = srcSize; - } else { - cctx->params.fParams.contentSizeFlag = 0; - } - - return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); -} - -/* ****************************************************************** -* Streaming -********************************************************************/ - -typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage; - -struct ZSTD_CStream_s { - ZSTD_CCtx *cctx; - ZSTD_CDict *cdictLocal; - const ZSTD_CDict *cdict; - char *inBuff; - size_t inBuffSize; - size_t inToCompress; - size_t inBuffPos; - size_t inBuffTarget; - size_t blockSize; - char *outBuff; - size_t outBuffSize; - size_t outBuffContentSize; - size_t outBuffFlushedSize; - ZSTD_cStreamStage stage; - U32 checksum; - U32 frameEnded; - U64 pledgedSrcSize; - U64 inputProcessed; - ZSTD_parameters params; - ZSTD_customMem customMem; -}; /* typedef'd to ZSTD_CStream within "zstd.h" */ - -size_t ZSTD_CStreamWorkspaceBound(ZSTD_compressionParameters cParams) -{ - size_t const inBuffSize = (size_t)1 << cParams.windowLog; - size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, inBuffSize); - size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; - - return ZSTD_CCtxWorkspaceBound(cParams) + ZSTD_ALIGN(sizeof(ZSTD_CStream)) + ZSTD_ALIGN(inBuffSize) + ZSTD_ALIGN(outBuffSize); -} - -ZSTD_CStream *ZSTD_createCStream_advanced(ZSTD_customMem customMem) -{ - ZSTD_CStream *zcs; - - if (!customMem.customAlloc || !customMem.customFree) - return NULL; - - zcs = (ZSTD_CStream *)ZSTD_malloc(sizeof(ZSTD_CStream), customMem); - if (zcs == NULL) - return NULL; - memset(zcs, 0, sizeof(ZSTD_CStream)); - memcpy(&zcs->customMem, &customMem, sizeof(ZSTD_customMem)); - zcs->cctx = ZSTD_createCCtx_advanced(customMem); - if (zcs->cctx == NULL) { - ZSTD_freeCStream(zcs); - return NULL; - } - return zcs; -} - -size_t ZSTD_freeCStream(ZSTD_CStream *zcs) -{ - if (zcs == NULL) - return 0; /* support free on NULL */ - { - ZSTD_customMem const cMem = zcs->customMem; - ZSTD_freeCCtx(zcs->cctx); - zcs->cctx = NULL; - ZSTD_freeCDict(zcs->cdictLocal); - zcs->cdictLocal = NULL; - ZSTD_free(zcs->inBuff, cMem); - zcs->inBuff = NULL; - ZSTD_free(zcs->outBuff, cMem); - zcs->outBuff = NULL; - ZSTD_free(zcs, cMem); - return 0; - } -} - -/*====== Initialization ======*/ - -size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; } -size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */; } - -static size_t ZSTD_resetCStream_internal(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize) -{ - if (zcs->inBuffSize == 0) - return ERROR(stage_wrong); /* zcs has not been init at least once => can't reset */ - - if (zcs->cdict) - CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize)) - else - CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize)); - - zcs->inToCompress = 0; - zcs->inBuffPos = 0; - zcs->inBuffTarget = zcs->blockSize; - zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; - zcs->stage = zcss_load; - zcs->frameEnded = 0; - zcs->pledgedSrcSize = pledgedSrcSize; - zcs->inputProcessed = 0; - return 0; /* ready to go */ -} - -size_t ZSTD_resetCStream(ZSTD_CStream *zcs, unsigned long long pledgedSrcSize) -{ - - zcs->params.fParams.contentSizeFlag = (pledgedSrcSize > 0); - - return ZSTD_resetCStream_internal(zcs, pledgedSrcSize); -} - -static size_t ZSTD_initCStream_advanced(ZSTD_CStream *zcs, const void *dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize) -{ - /* allocate buffers */ - { - size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog; - if (zcs->inBuffSize < neededInBuffSize) { - zcs->inBuffSize = neededInBuffSize; - ZSTD_free(zcs->inBuff, zcs->customMem); - zcs->inBuff = (char *)ZSTD_malloc(neededInBuffSize, zcs->customMem); - if (zcs->inBuff == NULL) - return ERROR(memory_allocation); - } - zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize); - } - if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize) + 1) { - zcs->outBuffSize = ZSTD_compressBound(zcs->blockSize) + 1; - ZSTD_free(zcs->outBuff, zcs->customMem); - zcs->outBuff = (char *)ZSTD_malloc(zcs->outBuffSize, zcs->customMem); - if (zcs->outBuff == NULL) - return ERROR(memory_allocation); - } - - if (dict && dictSize >= 8) { - ZSTD_freeCDict(zcs->cdictLocal); - zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, 0, params, zcs->customMem); - if (zcs->cdictLocal == NULL) - return ERROR(memory_allocation); - zcs->cdict = zcs->cdictLocal; - } else - zcs->cdict = NULL; - - zcs->checksum = params.fParams.checksumFlag > 0; - zcs->params = params; - - return ZSTD_resetCStream_internal(zcs, pledgedSrcSize); -} - -ZSTD_CStream *ZSTD_initCStream(ZSTD_parameters params, unsigned long long pledgedSrcSize, void *workspace, size_t workspaceSize) -{ - ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); - ZSTD_CStream *const zcs = ZSTD_createCStream_advanced(stackMem); - if (zcs) { - size_t const code = ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize); - if (ZSTD_isError(code)) { - return NULL; - } - } - return zcs; -} - -ZSTD_CStream *ZSTD_initCStream_usingCDict(const ZSTD_CDict *cdict, unsigned long long pledgedSrcSize, void *workspace, size_t workspaceSize) -{ - ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict); - ZSTD_CStream *const zcs = ZSTD_initCStream(params, pledgedSrcSize, workspace, workspaceSize); - if (zcs) { - zcs->cdict = cdict; - if (ZSTD_isError(ZSTD_resetCStream_internal(zcs, pledgedSrcSize))) { - return NULL; - } - } - return zcs; -} - -/*====== Compression ======*/ - -typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e; - -ZSTD_STATIC size_t ZSTD_limitCopy(void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - size_t const length = MIN(dstCapacity, srcSize); - memcpy(dst, src, length); - return length; -} - -static size_t ZSTD_compressStream_generic(ZSTD_CStream *zcs, void *dst, size_t *dstCapacityPtr, const void *src, size_t *srcSizePtr, ZSTD_flush_e const flush) -{ - U32 someMoreWork = 1; - const char *const istart = (const char *)src; - const char *const iend = istart + *srcSizePtr; - const char *ip = istart; - char *const ostart = (char *)dst; - char *const oend = ostart + *dstCapacityPtr; - char *op = ostart; - - while (someMoreWork) { - switch (zcs->stage) { - case zcss_init: - return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */ - - case zcss_load: - /* complete inBuffer */ - { - size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; - size_t const loaded = ZSTD_limitCopy(zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend - ip); - zcs->inBuffPos += loaded; - ip += loaded; - if ((zcs->inBuffPos == zcs->inToCompress) || (!flush && (toLoad != loaded))) { - someMoreWork = 0; - break; /* not enough input to get a full block : stop there, wait for more */ - } - } - /* compress curr block (note : this stage cannot be stopped in the middle) */ - { - void *cDst; - size_t cSize; - size_t const iSize = zcs->inBuffPos - zcs->inToCompress; - size_t oSize = oend - op; - if (oSize >= ZSTD_compressBound(iSize)) - cDst = op; /* compress directly into output buffer (avoid flush stage) */ - else - cDst = zcs->outBuff, oSize = zcs->outBuffSize; - cSize = (flush == zsf_end) ? ZSTD_compressEnd(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize) - : ZSTD_compressContinue(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize); - if (ZSTD_isError(cSize)) - return cSize; - if (flush == zsf_end) - zcs->frameEnded = 1; - /* prepare next block */ - zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; - if (zcs->inBuffTarget > zcs->inBuffSize) - zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; /* note : inBuffSize >= blockSize */ - zcs->inToCompress = zcs->inBuffPos; - if (cDst == op) { - op += cSize; - break; - } /* no need to flush */ - zcs->outBuffContentSize = cSize; - zcs->outBuffFlushedSize = 0; - zcs->stage = zcss_flush; /* pass-through to flush stage */ - } - fallthrough; - - case zcss_flush: { - size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; - size_t const flushed = ZSTD_limitCopy(op, oend - op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush); - op += flushed; - zcs->outBuffFlushedSize += flushed; - if (toFlush != flushed) { - someMoreWork = 0; - break; - } /* dst too small to store flushed data : stop there */ - zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; - zcs->stage = zcss_load; - break; - } - - case zcss_final: - someMoreWork = 0; /* do nothing */ - break; - - default: - return ERROR(GENERIC); /* impossible */ - } - } - - *srcSizePtr = ip - istart; - *dstCapacityPtr = op - ostart; - zcs->inputProcessed += *srcSizePtr; - if (zcs->frameEnded) - return 0; - { - size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos; - if (hintInSize == 0) - hintInSize = zcs->blockSize; - return hintInSize; - } -} - -size_t ZSTD_compressStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output, ZSTD_inBuffer *input) -{ - size_t sizeRead = input->size - input->pos; - size_t sizeWritten = output->size - output->pos; - size_t const result = - ZSTD_compressStream_generic(zcs, (char *)(output->dst) + output->pos, &sizeWritten, (const char *)(input->src) + input->pos, &sizeRead, zsf_gather); - input->pos += sizeRead; - output->pos += sizeWritten; - return result; -} - -/*====== Finalize ======*/ - -/*! ZSTD_flushStream() : -* @return : amount of data remaining to flush */ -size_t ZSTD_flushStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output) -{ - size_t srcSize = 0; - size_t sizeWritten = output->size - output->pos; - size_t const result = ZSTD_compressStream_generic(zcs, (char *)(output->dst) + output->pos, &sizeWritten, &srcSize, - &srcSize, /* use a valid src address instead of NULL */ - zsf_flush); - output->pos += sizeWritten; - if (ZSTD_isError(result)) - return result; - return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */ -} - -size_t ZSTD_endStream(ZSTD_CStream *zcs, ZSTD_outBuffer *output) -{ - BYTE *const ostart = (BYTE *)(output->dst) + output->pos; - BYTE *const oend = (BYTE *)(output->dst) + output->size; - BYTE *op = ostart; - - if ((zcs->pledgedSrcSize) && (zcs->inputProcessed != zcs->pledgedSrcSize)) - return ERROR(srcSize_wrong); /* pledgedSrcSize not respected */ - - if (zcs->stage != zcss_final) { - /* flush whatever remains */ - size_t srcSize = 0; - size_t sizeWritten = output->size - output->pos; - size_t const notEnded = - ZSTD_compressStream_generic(zcs, ostart, &sizeWritten, &srcSize, &srcSize, zsf_end); /* use a valid src address instead of NULL */ - size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; - op += sizeWritten; - if (remainingToFlush) { - output->pos += sizeWritten; - return remainingToFlush + ZSTD_BLOCKHEADERSIZE /* final empty block */ + (zcs->checksum * 4); - } - /* create epilogue */ - zcs->stage = zcss_final; - zcs->outBuffContentSize = !notEnded ? 0 : ZSTD_compressEnd(zcs->cctx, zcs->outBuff, zcs->outBuffSize, NULL, - 0); /* write epilogue, including final empty block, into outBuff */ - } - - /* flush epilogue */ - { - size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; - size_t const flushed = ZSTD_limitCopy(op, oend - op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush); - op += flushed; - zcs->outBuffFlushedSize += flushed; - output->pos += op - ostart; - if (toFlush == flushed) - zcs->stage = zcss_init; /* end reached */ - return toFlush - flushed; - } -} - -/*-===== Pre-defined compression levels =====-*/ - -#define ZSTD_DEFAULT_CLEVEL 1 -#define ZSTD_MAX_CLEVEL 22 -int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } - -static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL + 1] = { - { - /* "default" */ - /* W, C, H, S, L, TL, strat */ - {18, 12, 12, 1, 7, 16, ZSTD_fast}, /* level 0 - never used */ - {19, 13, 14, 1, 7, 16, ZSTD_fast}, /* level 1 */ - {19, 15, 16, 1, 6, 16, ZSTD_fast}, /* level 2 */ - {20, 16, 17, 1, 5, 16, ZSTD_dfast}, /* level 3.*/ - {20, 18, 18, 1, 5, 16, ZSTD_dfast}, /* level 4.*/ - {20, 15, 18, 3, 5, 16, ZSTD_greedy}, /* level 5 */ - {21, 16, 19, 2, 5, 16, ZSTD_lazy}, /* level 6 */ - {21, 17, 20, 3, 5, 16, ZSTD_lazy}, /* level 7 */ - {21, 18, 20, 3, 5, 16, ZSTD_lazy2}, /* level 8 */ - {21, 20, 20, 3, 5, 16, ZSTD_lazy2}, /* level 9 */ - {21, 19, 21, 4, 5, 16, ZSTD_lazy2}, /* level 10 */ - {22, 20, 22, 4, 5, 16, ZSTD_lazy2}, /* level 11 */ - {22, 20, 22, 5, 5, 16, ZSTD_lazy2}, /* level 12 */ - {22, 21, 22, 5, 5, 16, ZSTD_lazy2}, /* level 13 */ - {22, 21, 22, 6, 5, 16, ZSTD_lazy2}, /* level 14 */ - {22, 21, 21, 5, 5, 16, ZSTD_btlazy2}, /* level 15 */ - {23, 22, 22, 5, 5, 16, ZSTD_btlazy2}, /* level 16 */ - {23, 21, 22, 4, 5, 24, ZSTD_btopt}, /* level 17 */ - {23, 23, 22, 6, 5, 32, ZSTD_btopt}, /* level 18 */ - {23, 23, 22, 6, 3, 48, ZSTD_btopt}, /* level 19 */ - {25, 25, 23, 7, 3, 64, ZSTD_btopt2}, /* level 20 */ - {26, 26, 23, 7, 3, 256, ZSTD_btopt2}, /* level 21 */ - {27, 27, 25, 9, 3, 512, ZSTD_btopt2}, /* level 22 */ - }, - { - /* for srcSize <= 256 KB */ - /* W, C, H, S, L, T, strat */ - {0, 0, 0, 0, 0, 0, ZSTD_fast}, /* level 0 - not used */ - {18, 13, 14, 1, 6, 8, ZSTD_fast}, /* level 1 */ - {18, 14, 13, 1, 5, 8, ZSTD_dfast}, /* level 2 */ - {18, 16, 15, 1, 5, 8, ZSTD_dfast}, /* level 3 */ - {18, 15, 17, 1, 5, 8, ZSTD_greedy}, /* level 4.*/ - {18, 16, 17, 4, 5, 8, ZSTD_greedy}, /* level 5.*/ - {18, 16, 17, 3, 5, 8, ZSTD_lazy}, /* level 6.*/ - {18, 17, 17, 4, 4, 8, ZSTD_lazy}, /* level 7 */ - {18, 17, 17, 4, 4, 8, ZSTD_lazy2}, /* level 8 */ - {18, 17, 17, 5, 4, 8, ZSTD_lazy2}, /* level 9 */ - {18, 17, 17, 6, 4, 8, ZSTD_lazy2}, /* level 10 */ - {18, 18, 17, 6, 4, 8, ZSTD_lazy2}, /* level 11.*/ - {18, 18, 17, 7, 4, 8, ZSTD_lazy2}, /* level 12.*/ - {18, 19, 17, 6, 4, 8, ZSTD_btlazy2}, /* level 13 */ - {18, 18, 18, 4, 4, 16, ZSTD_btopt}, /* level 14.*/ - {18, 18, 18, 4, 3, 16, ZSTD_btopt}, /* level 15.*/ - {18, 19, 18, 6, 3, 32, ZSTD_btopt}, /* level 16.*/ - {18, 19, 18, 8, 3, 64, ZSTD_btopt}, /* level 17.*/ - {18, 19, 18, 9, 3, 128, ZSTD_btopt}, /* level 18.*/ - {18, 19, 18, 10, 3, 256, ZSTD_btopt}, /* level 19.*/ - {18, 19, 18, 11, 3, 512, ZSTD_btopt2}, /* level 20.*/ - {18, 19, 18, 12, 3, 512, ZSTD_btopt2}, /* level 21.*/ - {18, 19, 18, 13, 3, 512, ZSTD_btopt2}, /* level 22.*/ - }, - { - /* for srcSize <= 128 KB */ - /* W, C, H, S, L, T, strat */ - {17, 12, 12, 1, 7, 8, ZSTD_fast}, /* level 0 - not used */ - {17, 12, 13, 1, 6, 8, ZSTD_fast}, /* level 1 */ - {17, 13, 16, 1, 5, 8, ZSTD_fast}, /* level 2 */ - {17, 16, 16, 2, 5, 8, ZSTD_dfast}, /* level 3 */ - {17, 13, 15, 3, 4, 8, ZSTD_greedy}, /* level 4 */ - {17, 15, 17, 4, 4, 8, ZSTD_greedy}, /* level 5 */ - {17, 16, 17, 3, 4, 8, ZSTD_lazy}, /* level 6 */ - {17, 15, 17, 4, 4, 8, ZSTD_lazy2}, /* level 7 */ - {17, 17, 17, 4, 4, 8, ZSTD_lazy2}, /* level 8 */ - {17, 17, 17, 5, 4, 8, ZSTD_lazy2}, /* level 9 */ - {17, 17, 17, 6, 4, 8, ZSTD_lazy2}, /* level 10 */ - {17, 17, 17, 7, 4, 8, ZSTD_lazy2}, /* level 11 */ - {17, 17, 17, 8, 4, 8, ZSTD_lazy2}, /* level 12 */ - {17, 18, 17, 6, 4, 8, ZSTD_btlazy2}, /* level 13.*/ - {17, 17, 17, 7, 3, 8, ZSTD_btopt}, /* level 14.*/ - {17, 17, 17, 7, 3, 16, ZSTD_btopt}, /* level 15.*/ - {17, 18, 17, 7, 3, 32, ZSTD_btopt}, /* level 16.*/ - {17, 18, 17, 7, 3, 64, ZSTD_btopt}, /* level 17.*/ - {17, 18, 17, 7, 3, 256, ZSTD_btopt}, /* level 18.*/ - {17, 18, 17, 8, 3, 256, ZSTD_btopt}, /* level 19.*/ - {17, 18, 17, 9, 3, 256, ZSTD_btopt2}, /* level 20.*/ - {17, 18, 17, 10, 3, 256, ZSTD_btopt2}, /* level 21.*/ - {17, 18, 17, 11, 3, 512, ZSTD_btopt2}, /* level 22.*/ - }, - { - /* for srcSize <= 16 KB */ - /* W, C, H, S, L, T, strat */ - {14, 12, 12, 1, 7, 6, ZSTD_fast}, /* level 0 - not used */ - {14, 14, 14, 1, 6, 6, ZSTD_fast}, /* level 1 */ - {14, 14, 14, 1, 4, 6, ZSTD_fast}, /* level 2 */ - {14, 14, 14, 1, 4, 6, ZSTD_dfast}, /* level 3.*/ - {14, 14, 14, 4, 4, 6, ZSTD_greedy}, /* level 4.*/ - {14, 14, 14, 3, 4, 6, ZSTD_lazy}, /* level 5.*/ - {14, 14, 14, 4, 4, 6, ZSTD_lazy2}, /* level 6 */ - {14, 14, 14, 5, 4, 6, ZSTD_lazy2}, /* level 7 */ - {14, 14, 14, 6, 4, 6, ZSTD_lazy2}, /* level 8.*/ - {14, 15, 14, 6, 4, 6, ZSTD_btlazy2}, /* level 9.*/ - {14, 15, 14, 3, 3, 6, ZSTD_btopt}, /* level 10.*/ - {14, 15, 14, 6, 3, 8, ZSTD_btopt}, /* level 11.*/ - {14, 15, 14, 6, 3, 16, ZSTD_btopt}, /* level 12.*/ - {14, 15, 14, 6, 3, 24, ZSTD_btopt}, /* level 13.*/ - {14, 15, 15, 6, 3, 48, ZSTD_btopt}, /* level 14.*/ - {14, 15, 15, 6, 3, 64, ZSTD_btopt}, /* level 15.*/ - {14, 15, 15, 6, 3, 96, ZSTD_btopt}, /* level 16.*/ - {14, 15, 15, 6, 3, 128, ZSTD_btopt}, /* level 17.*/ - {14, 15, 15, 6, 3, 256, ZSTD_btopt}, /* level 18.*/ - {14, 15, 15, 7, 3, 256, ZSTD_btopt}, /* level 19.*/ - {14, 15, 15, 8, 3, 256, ZSTD_btopt2}, /* level 20.*/ - {14, 15, 15, 9, 3, 256, ZSTD_btopt2}, /* level 21.*/ - {14, 15, 15, 10, 3, 256, ZSTD_btopt2}, /* level 22.*/ - }, -}; - -/*! ZSTD_getCParams() : -* @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`. -* Size values are optional, provide 0 if not known or unused */ -ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) -{ - ZSTD_compressionParameters cp; - size_t const addedSize = srcSize ? 0 : 500; - U64 const rSize = srcSize + dictSize ? srcSize + dictSize + addedSize : (U64)-1; - U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */ - if (compressionLevel <= 0) - compressionLevel = ZSTD_DEFAULT_CLEVEL; /* 0 == default; no negative compressionLevel yet */ - if (compressionLevel > ZSTD_MAX_CLEVEL) - compressionLevel = ZSTD_MAX_CLEVEL; - cp = ZSTD_defaultCParameters[tableID][compressionLevel]; - if (ZSTD_32bits()) { /* auto-correction, for 32-bits mode */ - if (cp.windowLog > ZSTD_WINDOWLOG_MAX) - cp.windowLog = ZSTD_WINDOWLOG_MAX; - if (cp.chainLog > ZSTD_CHAINLOG_MAX) - cp.chainLog = ZSTD_CHAINLOG_MAX; - if (cp.hashLog > ZSTD_HASHLOG_MAX) - cp.hashLog = ZSTD_HASHLOG_MAX; - } - cp = ZSTD_adjustCParams(cp, srcSize, dictSize); - return cp; -} - -/*! ZSTD_getParams() : -* same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`). -* All fields of `ZSTD_frameParameters` are set to default (0) */ -ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) -{ - ZSTD_parameters params; - ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize); - memset(¶ms, 0, sizeof(params)); - params.cParams = cParams; - return params; -} - -EXPORT_SYMBOL(ZSTD_maxCLevel); -EXPORT_SYMBOL(ZSTD_compressBound); - -EXPORT_SYMBOL(ZSTD_CCtxWorkspaceBound); -EXPORT_SYMBOL(ZSTD_initCCtx); -EXPORT_SYMBOL(ZSTD_compressCCtx); -EXPORT_SYMBOL(ZSTD_compress_usingDict); - -EXPORT_SYMBOL(ZSTD_CDictWorkspaceBound); -EXPORT_SYMBOL(ZSTD_initCDict); -EXPORT_SYMBOL(ZSTD_compress_usingCDict); - -EXPORT_SYMBOL(ZSTD_CStreamWorkspaceBound); -EXPORT_SYMBOL(ZSTD_initCStream); -EXPORT_SYMBOL(ZSTD_initCStream_usingCDict); -EXPORT_SYMBOL(ZSTD_resetCStream); -EXPORT_SYMBOL(ZSTD_compressStream); -EXPORT_SYMBOL(ZSTD_flushStream); -EXPORT_SYMBOL(ZSTD_endStream); -EXPORT_SYMBOL(ZSTD_CStreamInSize); -EXPORT_SYMBOL(ZSTD_CStreamOutSize); - -EXPORT_SYMBOL(ZSTD_getCParams); -EXPORT_SYMBOL(ZSTD_getParams); -EXPORT_SYMBOL(ZSTD_checkCParams); -EXPORT_SYMBOL(ZSTD_adjustCParams); - -EXPORT_SYMBOL(ZSTD_compressBegin); -EXPORT_SYMBOL(ZSTD_compressBegin_usingDict); -EXPORT_SYMBOL(ZSTD_compressBegin_advanced); -EXPORT_SYMBOL(ZSTD_copyCCtx); -EXPORT_SYMBOL(ZSTD_compressBegin_usingCDict); -EXPORT_SYMBOL(ZSTD_compressContinue); -EXPORT_SYMBOL(ZSTD_compressEnd); - -EXPORT_SYMBOL(ZSTD_getBlockSizeMax); -EXPORT_SYMBOL(ZSTD_compressBlock); - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("Zstd Compressor"); diff --git a/lib/zstd/compress/fse_compress.c b/lib/zstd/compress/fse_compress.c new file mode 100644 index 000000000000..436985b620e5 --- /dev/null +++ b/lib/zstd/compress/fse_compress.c @@ -0,0 +1,625 @@ +/* ****************************************************************** + * FSE : Finite State Entropy encoder + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Includes +****************************************************************/ +#include "../common/compiler.h" +#include "../common/mem.h" /* U32, U16, etc. */ +#include "../common/debug.h" /* assert, DEBUGLOG */ +#include "hist.h" /* HIST_count_wksp */ +#include "../common/bitstream.h" +#define FSE_STATIC_LINKING_ONLY +#include "../common/fse.h" +#include "../common/error_private.h" +#define ZSTD_DEPS_NEED_MALLOC +#define ZSTD_DEPS_NEED_MATH64 +#include "../common/zstd_deps.h" /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */ + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define FSE_isError ERR_isError + + +/* ************************************************************** +* Templates +****************************************************************/ +/* + designed to be included + for type-specific functions (template emulation in C) + Objective is to write these functions only once, for improved maintenance +*/ + +/* safety checks */ +#ifndef FSE_FUNCTION_EXTENSION +# error "FSE_FUNCTION_EXTENSION must be defined" +#endif +#ifndef FSE_FUNCTION_TYPE +# error "FSE_FUNCTION_TYPE must be defined" +#endif + +/* Function names */ +#define FSE_CAT(X,Y) X##Y +#define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y) +#define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y) + + +/* Function templates */ + +/* FSE_buildCTable_wksp() : + * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). + * wkspSize should be sized to handle worst case situation, which is `1<>1 : 1) ; + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + U32 const step = FSE_TABLESTEP(tableSize); + + U32* cumul = (U32*)workSpace; + FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2)); + + U32 highThreshold = tableSize-1; + + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */ + if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge); + /* CTable header */ + tableU16[-2] = (U16) tableLog; + tableU16[-1] = (U16) maxSymbolValue; + assert(tableLog < 16); /* required for threshold strategy to work */ + + /* For explanations on how to distribute symbol values over the table : + * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ + + #ifdef __clang_analyzer__ + ZSTD_memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */ + #endif + + /* symbol start positions */ + { U32 u; + cumul[0] = 0; + for (u=1; u <= maxSymbolValue+1; u++) { + if (normalizedCounter[u-1]==-1) { /* Low proba symbol */ + cumul[u] = cumul[u-1] + 1; + tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1); + } else { + cumul[u] = cumul[u-1] + normalizedCounter[u-1]; + } } + cumul[maxSymbolValue+1] = tableSize+1; + } + + /* Spread symbols */ + { U32 position = 0; + U32 symbol; + for (symbol=0; symbol<=maxSymbolValue; symbol++) { + int nbOccurrences; + int const freq = normalizedCounter[symbol]; + for (nbOccurrences=0; nbOccurrences highThreshold) + position = (position + step) & tableMask; /* Low proba area */ + } } + + assert(position==0); /* Must have initialized all positions */ + } + + /* Build table */ + { U32 u; for (u=0; u> 3) + 3; + return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ +} + +static size_t +FSE_writeNCount_generic (void* header, size_t headerBufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, + unsigned writeIsSafe) +{ + BYTE* const ostart = (BYTE*) header; + BYTE* out = ostart; + BYTE* const oend = ostart + headerBufferSize; + int nbBits; + const int tableSize = 1 << tableLog; + int remaining; + int threshold; + U32 bitStream = 0; + int bitCount = 0; + unsigned symbol = 0; + unsigned const alphabetSize = maxSymbolValue + 1; + int previousIs0 = 0; + + /* Table Size */ + bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount; + bitCount += 4; + + /* Init */ + remaining = tableSize+1; /* +1 for extra accuracy */ + threshold = tableSize; + nbBits = tableLog+1; + + while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */ + if (previousIs0) { + unsigned start = symbol; + while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++; + if (symbol == alphabetSize) break; /* incorrect distribution */ + while (symbol >= start+24) { + start+=24; + bitStream += 0xFFFFU << bitCount; + if ((!writeIsSafe) && (out > oend-2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE) bitStream; + out[1] = (BYTE)(bitStream>>8); + out+=2; + bitStream>>=16; + } + while (symbol >= start+3) { + start+=3; + bitStream += 3 << bitCount; + bitCount += 2; + } + bitStream += (symbol-start) << bitCount; + bitCount += 2; + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + { int count = normalizedCounter[symbol++]; + int const max = (2*threshold-1) - remaining; + remaining -= count < 0 ? -count : count; + count++; /* +1 for extra accuracy */ + if (count>=threshold) + count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ + bitStream += count << bitCount; + bitCount += nbBits; + bitCount -= (count>=1; } + } + if (bitCount>16) { + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out += 2; + bitStream >>= 16; + bitCount -= 16; + } } + + if (remaining != 1) + return ERROR(GENERIC); /* incorrect normalized distribution */ + assert(symbol <= alphabetSize); + + /* flush remaining bitStream */ + if ((!writeIsSafe) && (out > oend - 2)) + return ERROR(dstSize_tooSmall); /* Buffer overflow */ + out[0] = (BYTE)bitStream; + out[1] = (BYTE)(bitStream>>8); + out+= (bitCount+7) /8; + + return (out-ostart); +} + + +size_t FSE_writeNCount (void* buffer, size_t bufferSize, + const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) +{ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */ + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */ + + if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); + + return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */); +} + + +/*-************************************************************** +* FSE Compression Code +****************************************************************/ + +FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) +{ + size_t size; + if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX; + size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); + return (FSE_CTable*)ZSTD_malloc(size); +} + +void FSE_freeCTable (FSE_CTable* ct) { ZSTD_free(ct); } + +/* provides the minimum logSize to safely represent a distribution */ +static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) +{ + U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1; + U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; + U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + return minBits; +} + +unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) +{ + U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; + U32 tableLog = maxTableLog; + U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); + assert(srcSize > 1); /* Not supported, RLE should be used instead */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */ + if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */ + if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG; + if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG; + return tableLog; +} + +unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); +} + +/* Secondary normalization method. + To be used when primary method fails. */ + +static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue, short lowProbCount) +{ + short const NOT_YET_ASSIGNED = -2; + U32 s; + U32 distributed = 0; + U32 ToDistribute; + + /* Init */ + U32 const lowThreshold = (U32)(total >> tableLog); + U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == 0) { + norm[s]=0; + continue; + } + if (count[s] <= lowThreshold) { + norm[s] = lowProbCount; + distributed++; + total -= count[s]; + continue; + } + if (count[s] <= lowOne) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } + + norm[s]=NOT_YET_ASSIGNED; + } + ToDistribute = (1 << tableLog) - distributed; + + if (ToDistribute == 0) + return 0; + + if ((total / ToDistribute) > lowOne) { + /* risk of rounding to zero */ + lowOne = (U32)((total * 3) / (ToDistribute * 2)); + for (s=0; s<=maxSymbolValue; s++) { + if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { + norm[s] = 1; + distributed++; + total -= count[s]; + continue; + } } + ToDistribute = (1 << tableLog) - distributed; + } + + if (distributed == maxSymbolValue+1) { + /* all values are pretty poor; + probably incompressible data (should have already been detected); + find max, then give all remaining points to max */ + U32 maxV = 0, maxC = 0; + for (s=0; s<=maxSymbolValue; s++) + if (count[s] > maxC) { maxV=s; maxC=count[s]; } + norm[maxV] += (short)ToDistribute; + return 0; + } + + if (total == 0) { + /* all of the symbols were low enough for the lowOne or lowThreshold */ + for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1)) + if (norm[s] > 0) { ToDistribute--; norm[s]++; } + return 0; + } + + { U64 const vStepLog = 62 - tableLog; + U64 const mid = (1ULL << (vStepLog-1)) - 1; + U64 const rStep = ZSTD_div64((((U64)1<> vStepLog); + U32 const sEnd = (U32)(end >> vStepLog); + U32 const weight = sEnd - sStart; + if (weight < 1) + return ERROR(GENERIC); + norm[s] = (short)weight; + tmpTotal = end; + } } } + + return 0; +} + +size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog, + const unsigned* count, size_t total, + unsigned maxSymbolValue, unsigned useLowProbCount) +{ + /* Sanity checks */ + if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG; + if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */ + if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */ + if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ + + { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; + short const lowProbCount = useLowProbCount ? -1 : 1; + U64 const scale = 62 - tableLog; + U64 const step = ZSTD_div64((U64)1<<62, (U32)total); /* <== here, one division ! */ + U64 const vStep = 1ULL<<(scale-20); + int stillToDistribute = 1<> tableLog); + + for (s=0; s<=maxSymbolValue; s++) { + if (count[s] == total) return 0; /* rle special case */ + if (count[s] == 0) { normalizedCounter[s]=0; continue; } + if (count[s] <= lowThreshold) { + normalizedCounter[s] = lowProbCount; + stillToDistribute--; + } else { + short proba = (short)((count[s]*step) >> scale); + if (proba<8) { + U64 restToBeat = vStep * rtbTable[proba]; + proba += (count[s]*step) - ((U64)proba< restToBeat; + } + if (proba > largestP) { largestP=proba; largest=s; } + normalizedCounter[s] = proba; + stillToDistribute -= proba; + } } + if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { + /* corner case, need another normalization method */ + size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue, lowProbCount); + if (FSE_isError(errorCode)) return errorCode; + } + else normalizedCounter[largest] += (short)stillToDistribute; + } + +#if 0 + { /* Print Table (debug) */ + U32 s; + U32 nTotal = 0; + for (s=0; s<=maxSymbolValue; s++) + RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]); + for (s=0; s<=maxSymbolValue; s++) + nTotal += abs(normalizedCounter[s]); + if (nTotal != (1U<>1); /* assumption : tableLog >= 1 */ + FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT); + unsigned s; + + /* Sanity checks */ + if (nbBits < 1) return ERROR(GENERIC); /* min size */ + + /* header */ + tableU16[-2] = (U16) nbBits; + tableU16[-1] = (U16) maxSymbolValue; + + /* Build table */ + for (s=0; s FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + FSE_FLUSHBITS(&bitC); + } + + /* 2 or 4 encoding per loop */ + while ( ip>istart ) { + + FSE_encodeSymbol(&bitC, &CState2, *--ip); + + if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */ + FSE_FLUSHBITS(&bitC); + + FSE_encodeSymbol(&bitC, &CState1, *--ip); + + if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */ + FSE_encodeSymbol(&bitC, &CState2, *--ip); + FSE_encodeSymbol(&bitC, &CState1, *--ip); + } + + FSE_FLUSHBITS(&bitC); + } + + FSE_flushCState(&bitC, &CState2); + FSE_flushCState(&bitC, &CState1); + return BIT_closeCStream(&bitC); +} + +size_t FSE_compress_usingCTable (void* dst, size_t dstSize, + const void* src, size_t srcSize, + const FSE_CTable* ct) +{ + unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); + + if (fast) + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); + else + return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); +} + + +size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } + + +#endif /* FSE_COMMONDEFS_ONLY */ diff --git a/lib/zstd/compress/hist.c b/lib/zstd/compress/hist.c new file mode 100644 index 000000000000..3ddc6dfb6894 --- /dev/null +++ b/lib/zstd/compress/hist.c @@ -0,0 +1,165 @@ +/* ****************************************************************** + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* --- dependencies --- */ +#include "../common/mem.h" /* U32, BYTE, etc. */ +#include "../common/debug.h" /* assert, DEBUGLOG */ +#include "../common/error_private.h" /* ERROR */ +#include "hist.h" + + +/* --- Error management --- */ +unsigned HIST_isError(size_t code) { return ERR_isError(code); } + +/*-************************************************************** + * Histogram functions + ****************************************************************/ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize) +{ + const BYTE* ip = (const BYTE*)src; + const BYTE* const end = ip + srcSize; + unsigned maxSymbolValue = *maxSymbolValuePtr; + unsigned largestCount=0; + + ZSTD_memset(count, 0, (maxSymbolValue+1) * sizeof(*count)); + if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } + + while (ip largestCount) largestCount = count[s]; + } + + return largestCount; +} + +typedef enum { trustInput, checkMaxSymbolValue } HIST_checkInput_e; + +/* HIST_count_parallel_wksp() : + * store histogram into 4 intermediate tables, recombined at the end. + * this design makes better use of OoO cpus, + * and is noticeably faster when some values are heavily repeated. + * But it needs some additional workspace for intermediate tables. + * `workSpace` must be a U32 table of size >= HIST_WKSP_SIZE_U32. + * @return : largest histogram frequency, + * or an error code (notably when histogram's alphabet is larger than *maxSymbolValuePtr) */ +static size_t HIST_count_parallel_wksp( + unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + HIST_checkInput_e check, + U32* const workSpace) +{ + const BYTE* ip = (const BYTE*)source; + const BYTE* const iend = ip+sourceSize; + size_t const countSize = (*maxSymbolValuePtr + 1) * sizeof(*count); + unsigned max=0; + U32* const Counting1 = workSpace; + U32* const Counting2 = Counting1 + 256; + U32* const Counting3 = Counting2 + 256; + U32* const Counting4 = Counting3 + 256; + + /* safety checks */ + assert(*maxSymbolValuePtr <= 255); + if (!sourceSize) { + ZSTD_memset(count, 0, countSize); + *maxSymbolValuePtr = 0; + return 0; + } + ZSTD_memset(workSpace, 0, 4*256*sizeof(unsigned)); + + /* by stripes of 16 bytes */ + { U32 cached = MEM_read32(ip); ip += 4; + while (ip < iend-15) { + U32 c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + c = cached; cached = MEM_read32(ip); ip += 4; + Counting1[(BYTE) c ]++; + Counting2[(BYTE)(c>>8) ]++; + Counting3[(BYTE)(c>>16)]++; + Counting4[ c>>24 ]++; + } + ip-=4; + } + + /* finish last symbols */ + while (ip max) max = Counting1[s]; + } } + + { unsigned maxSymbolValue = 255; + while (!Counting1[maxSymbolValue]) maxSymbolValue--; + if (check && maxSymbolValue > *maxSymbolValuePtr) return ERROR(maxSymbolValue_tooSmall); + *maxSymbolValuePtr = maxSymbolValue; + ZSTD_memmove(count, Counting1, countSize); /* in case count & Counting1 are overlapping */ + } + return (size_t)max; +} + +/* HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + void* workSpace, size_t workSpaceSize) +{ + if (sourceSize < 1500) /* heuristic threshold */ + return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize); + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, trustInput, (U32*)workSpace); +} + +/* HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* source, size_t sourceSize, + void* workSpace, size_t workSpaceSize) +{ + if ((size_t)workSpace & 3) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (workSpaceSize < HIST_WKSP_SIZE) return ERROR(workSpace_tooSmall); + if (*maxSymbolValuePtr < 255) + return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, checkMaxSymbolValue, (U32*)workSpace); + *maxSymbolValuePtr = 255; + return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace, workSpaceSize); +} + diff --git a/lib/zstd/compress/hist.h b/lib/zstd/compress/hist.h new file mode 100644 index 000000000000..228ed48a71de --- /dev/null +++ b/lib/zstd/compress/hist.h @@ -0,0 +1,75 @@ +/* ****************************************************************** + * hist : Histogram functions + * part of Finite State Entropy project + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* --- dependencies --- */ +#include "../common/zstd_deps.h" /* size_t */ + + +/* --- simple histogram functions --- */ + +/*! HIST_count(): + * Provides the precise count of each byte within a table 'count'. + * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1). + * Updates *maxSymbolValuePtr with actual largest symbol value detected. + * @return : count of the most frequent symbol (which isn't identified). + * or an error code, which can be tested using HIST_isError(). + * note : if return == srcSize, there is only one symbol. + */ +size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */ + + +/* --- advanced histogram functions --- */ + +#define HIST_WKSP_SIZE_U32 1024 +#define HIST_WKSP_SIZE (HIST_WKSP_SIZE_U32 * sizeof(unsigned)) +/** HIST_count_wksp() : + * Same as HIST_count(), but using an externally provided scratch buffer. + * Benefit is this function will use very little stack space. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + void* workSpace, size_t workSpaceSize); + +/** HIST_countFast() : + * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr. + * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` + */ +size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); + +/** HIST_countFast_wksp() : + * Same as HIST_countFast(), but using an externally provided scratch buffer. + * `workSpace` is a writable buffer which must be 4-bytes aligned, + * `workSpaceSize` must be >= HIST_WKSP_SIZE + */ +size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize, + void* workSpace, size_t workSpaceSize); + +/*! HIST_count_simple() : + * Same as HIST_countFast(), this function is unsafe, + * and will segfault if any value within `src` is `> *maxSymbolValuePtr`. + * It is also a bit slower for large inputs. + * However, it does not need any additional memory (not even on stack). + * @return : count of the most frequent symbol. + * Note this function doesn't produce any error (i.e. it must succeed). + */ +unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, + const void* src, size_t srcSize); diff --git a/lib/zstd/compress/huf_compress.c b/lib/zstd/compress/huf_compress.c new file mode 100644 index 000000000000..4acae45a4a6e --- /dev/null +++ b/lib/zstd/compress/huf_compress.c @@ -0,0 +1,902 @@ +/* ****************************************************************** + * Huffman encoder, part of New Generation Entropy library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * - Public forum : https://groups.google.com/forum/#!forum/lz4c + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Compiler specifics +****************************************************************/ + + +/* ************************************************************** +* Includes +****************************************************************/ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ +#include "../common/compiler.h" +#include "../common/bitstream.h" +#include "hist.h" +#define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */ +#include "../common/fse.h" /* header compression */ +#define HUF_STATIC_LINKING_ONLY +#include "../common/huf.h" +#include "../common/error_private.h" + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError +#define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */ + + +/* ************************************************************** +* Utils +****************************************************************/ +unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) +{ + return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); +} + + +/* ******************************************************* +* HUF : Huffman block compression +*********************************************************/ +/* HUF_compressWeights() : + * Same as FSE_compress(), but dedicated to huff0's weights compression. + * The use case needs much less stack memory. + * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. + */ +#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 + +typedef struct { + FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; + U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)]; + unsigned count[HUF_TABLELOG_MAX+1]; + S16 norm[HUF_TABLELOG_MAX+1]; +} HUF_CompressWeightsWksp; + +static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightTable, size_t wtSize, void* workspace, size_t workspaceSize) +{ + BYTE* const ostart = (BYTE*) dst; + BYTE* op = ostart; + BYTE* const oend = ostart + dstSize; + + unsigned maxSymbolValue = HUF_TABLELOG_MAX; + U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; + HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace; + + if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC); + + /* init conditions */ + if (wtSize <= 1) return 0; /* Not compressible */ + + /* Scan input and build symbol stats */ + { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */ + if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ + if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ + } + + tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); + CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) ); + + /* Write table description header */ + { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) ); + op += hSize; + } + + /* Compress */ + CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) ); + { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) ); + if (cSize == 0) return 0; /* not enough space for compressed data */ + op += cSize; + } + + return (size_t)(op-ostart); +} + + +typedef struct { + HUF_CompressWeightsWksp wksp; + BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; +} HUF_WriteCTableWksp; + +size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, + const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, + void* workspace, size_t workspaceSize) +{ + BYTE* op = (BYTE*)dst; + U32 n; + HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace; + + /* check conditions */ + if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + + /* convert to weight */ + wksp->bitsToWeight[0] = 0; + for (n=1; nbitsToWeight[n] = (BYTE)(huffLog + 1 - n); + for (n=0; nhuffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits]; + + /* attempt weights compression by FSE */ + { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) ); + if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ + op[0] = (BYTE)hSize; + return hSize+1; + } } + + /* write raw values as 4-bits (max : 15) */ + if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ + if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ + op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); + wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ + for (n=0; nhuffWeight[n] << 4) + wksp->huffWeight[n+1]); + return ((maxSymbolValue+1)/2) + 1; +} + +/*! HUF_writeCTable() : + `CTable` : Huffman tree to save, using huf representation. + @return : size of saved CTable */ +size_t HUF_writeCTable (void* dst, size_t maxDstSize, + const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog) +{ + HUF_WriteCTableWksp wksp; + return HUF_writeCTable_wksp(dst, maxDstSize, CTable, maxSymbolValue, huffLog, &wksp, sizeof(wksp)); +} + + +size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* hasZeroWeights) +{ + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ + U32 tableLog = 0; + U32 nbSymbols = 0; + + /* get symbol weights */ + CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); + *hasZeroWeights = (rankVal[0] > 0); + + /* check result */ + if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall); + + /* Prepare base value per rank */ + { U32 n, nextRankStart = 0; + for (n=1; n<=tableLog; n++) { + U32 curr = nextRankStart; + nextRankStart += (rankVal[n] << (n-1)); + rankVal[n] = curr; + } } + + /* fill nbBits */ + { U32 n; for (n=0; nn=tableLog+1 */ + U16 valPerRank[HUF_TABLELOG_MAX+2] = {0}; + { U32 n; for (n=0; n0; n--) { /* start at n=tablelog <-> w=1 */ + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + /* assign value within rank, symbol order */ + { U32 n; for (n=0; n maxNbBits to be maxNbBits. Then it adjusts + * the tree to so that it is a valid canonical Huffman tree. + * + * @pre The sum of the ranks of each symbol == 2^largestBits, + * where largestBits == huffNode[lastNonNull].nbBits. + * @post The sum of the ranks of each symbol == 2^largestBits, + * where largestBits is the return value <= maxNbBits. + * + * @param huffNode The Huffman tree modified in place to enforce maxNbBits. + * @param lastNonNull The symbol with the lowest count in the Huffman tree. + * @param maxNbBits The maximum allowed number of bits, which the Huffman tree + * may not respect. After this function the Huffman tree will + * respect maxNbBits. + * @return The maximum number of bits of the Huffman tree after adjustment, + * necessarily no more than maxNbBits. + */ +static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) +{ + const U32 largestBits = huffNode[lastNonNull].nbBits; + /* early exit : no elt > maxNbBits, so the tree is already valid. */ + if (largestBits <= maxNbBits) return largestBits; + + /* there are several too large elements (at least >= 2) */ + { int totalCost = 0; + const U32 baseCost = 1 << (largestBits - maxNbBits); + int n = (int)lastNonNull; + + /* Adjust any ranks > maxNbBits to maxNbBits. + * Compute totalCost, which is how far the sum of the ranks is + * we are over 2^largestBits after adjust the offending ranks. + */ + while (huffNode[n].nbBits > maxNbBits) { + totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); + huffNode[n].nbBits = (BYTE)maxNbBits; + n--; + } + /* n stops at huffNode[n].nbBits <= maxNbBits */ + assert(huffNode[n].nbBits <= maxNbBits); + /* n end at index of smallest symbol using < maxNbBits */ + while (huffNode[n].nbBits == maxNbBits) --n; + + /* renorm totalCost from 2^largestBits to 2^maxNbBits + * note : totalCost is necessarily a multiple of baseCost */ + assert((totalCost & (baseCost - 1)) == 0); + totalCost >>= (largestBits - maxNbBits); + assert(totalCost > 0); + + /* repay normalized cost */ + { U32 const noSymbol = 0xF0F0F0F0; + U32 rankLast[HUF_TABLELOG_MAX+2]; + + /* Get pos of last (smallest = lowest cum. count) symbol per rank */ + ZSTD_memset(rankLast, 0xF0, sizeof(rankLast)); + { U32 currentNbBits = maxNbBits; + int pos; + for (pos=n ; pos >= 0; pos--) { + if (huffNode[pos].nbBits >= currentNbBits) continue; + currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */ + rankLast[maxNbBits-currentNbBits] = (U32)pos; + } } + + while (totalCost > 0) { + /* Try to reduce the next power of 2 above totalCost because we + * gain back half the rank. + */ + U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1; + for ( ; nBitsToDecrease > 1; nBitsToDecrease--) { + U32 const highPos = rankLast[nBitsToDecrease]; + U32 const lowPos = rankLast[nBitsToDecrease-1]; + if (highPos == noSymbol) continue; + /* Decrease highPos if no symbols of lowPos or if it is + * not cheaper to remove 2 lowPos than highPos. + */ + if (lowPos == noSymbol) break; + { U32 const highTotal = huffNode[highPos].count; + U32 const lowTotal = 2 * huffNode[lowPos].count; + if (highTotal <= lowTotal) break; + } } + /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ + assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1); + /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ + while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) + nBitsToDecrease++; + assert(rankLast[nBitsToDecrease] != noSymbol); + /* Increase the number of bits to gain back half the rank cost. */ + totalCost -= 1 << (nBitsToDecrease-1); + huffNode[rankLast[nBitsToDecrease]].nbBits++; + + /* Fix up the new rank. + * If the new rank was empty, this symbol is now its smallest. + * Otherwise, this symbol will be the largest in the new rank so no adjustment. + */ + if (rankLast[nBitsToDecrease-1] == noSymbol) + rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; + /* Fix up the old rank. + * If the symbol was at position 0, meaning it was the highest weight symbol in the tree, + * it must be the only symbol in its rank, so the old rank now has no symbols. + * Otherwise, since the Huffman nodes are sorted by count, the previous position is now + * the smallest node in the rank. If the previous position belongs to a different rank, + * then the rank is now empty. + */ + if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ + rankLast[nBitsToDecrease] = noSymbol; + else { + rankLast[nBitsToDecrease]--; + if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease) + rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ + } + } /* while (totalCost > 0) */ + + /* If we've removed too much weight, then we have to add it back. + * To avoid overshooting again, we only adjust the smallest rank. + * We take the largest nodes from the lowest rank 0 and move them + * to rank 1. There's guaranteed to be enough rank 0 symbols because + * TODO. + */ + while (totalCost < 0) { /* Sometimes, cost correction overshoot */ + /* special case : no rank 1 symbol (using maxNbBits-1); + * let's create one from largest rank 0 (using maxNbBits). + */ + if (rankLast[1] == noSymbol) { + while (huffNode[n].nbBits == maxNbBits) n--; + huffNode[n+1].nbBits--; + assert(n >= 0); + rankLast[1] = (U32)(n+1); + totalCost++; + continue; + } + huffNode[ rankLast[1] + 1 ].nbBits--; + rankLast[1]++; + totalCost ++; + } + } /* repay normalized cost */ + } /* there are several too large elements (at least >= 2) */ + + return maxNbBits; +} + +typedef struct { + U32 base; + U32 curr; +} rankPos; + +typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32]; + +#define RANK_POSITION_TABLE_SIZE 32 + +typedef struct { + huffNodeTable huffNodeTbl; + rankPos rankPosition[RANK_POSITION_TABLE_SIZE]; +} HUF_buildCTable_wksp_tables; + +/** + * HUF_sort(): + * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order. + * + * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled. + * Must have (maxSymbolValue + 1) entries. + * @param[in] count Histogram of the symbols. + * @param[in] maxSymbolValue Maximum symbol value. + * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries. + */ +static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition) +{ + int n; + int const maxSymbolValue1 = (int)maxSymbolValue + 1; + + /* Compute base and set curr to base. + * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1. + * Then 2^lowerRank <= count[n]+1 <= 2^rank. + * We attribute each symbol to lowerRank's base value, because we want to know where + * each rank begins in the output, so for rank R we want to count ranks R+1 and above. + */ + ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE); + for (n = 0; n < maxSymbolValue1; ++n) { + U32 lowerRank = BIT_highbit32(count[n] + 1); + rankPosition[lowerRank].base++; + } + assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0); + for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) { + rankPosition[n-1].base += rankPosition[n].base; + rankPosition[n-1].curr = rankPosition[n-1].base; + } + /* Sort */ + for (n = 0; n < maxSymbolValue1; ++n) { + U32 const c = count[n]; + U32 const r = BIT_highbit32(c+1) + 1; + U32 pos = rankPosition[r].curr++; + /* Insert into the correct position in the rank. + * We have at most 256 symbols, so this insertion should be fine. + */ + while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) { + huffNode[pos] = huffNode[pos-1]; + pos--; + } + huffNode[pos].count = c; + huffNode[pos].byte = (BYTE)n; + } +} + + +/** HUF_buildCTable_wksp() : + * Same as HUF_buildCTable(), but using externally allocated scratch buffer. + * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as sizeof(HUF_buildCTable_wksp_tables). + */ +#define STARTNODE (HUF_SYMBOLVALUE_MAX+1) + +/* HUF_buildTree(): + * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree. + * + * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array. + * @param maxSymbolValue The maximum symbol value. + * @return The smallest node in the Huffman tree (by count). + */ +static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue) +{ + nodeElt* const huffNode0 = huffNode - 1; + int nonNullRank; + int lowS, lowN; + int nodeNb = STARTNODE; + int n, nodeRoot; + /* init for parents */ + nonNullRank = (int)maxSymbolValue; + while(huffNode[nonNullRank].count == 0) nonNullRank--; + lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb; + huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count; + huffNode[lowS].parent = huffNode[lowS-1].parent = (U16)nodeNb; + nodeNb++; lowS-=2; + for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); + huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ + + /* create parents */ + while (nodeNb <= nodeRoot) { + int const n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + int const n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; + huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; + huffNode[n1].parent = huffNode[n2].parent = (U16)nodeNb; + nodeNb++; + } + + /* distribute weights (unlimited tree height) */ + huffNode[nodeRoot].nbBits = 0; + for (n=nodeRoot-1; n>=STARTNODE; n--) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + for (n=0; n<=nonNullRank; n++) + huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1; + + return nonNullRank; +} + +/** + * HUF_buildCTableFromTree(): + * Build the CTable given the Huffman tree in huffNode. + * + * @param[out] CTable The output Huffman CTable. + * @param huffNode The Huffman tree. + * @param nonNullRank The last and smallest node in the Huffman tree. + * @param maxSymbolValue The maximum symbol value. + * @param maxNbBits The exact maximum number of bits used in the Huffman tree. + */ +static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits) +{ + /* fill result into ctable (val, nbBits) */ + int n; + U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0}; + U16 valPerRank[HUF_TABLELOG_MAX+1] = {0}; + int const alphabetSize = (int)(maxSymbolValue + 1); + for (n=0; n<=nonNullRank; n++) + nbPerRank[huffNode[n].nbBits]++; + /* determine starting value per rank */ + { U16 min = 0; + for (n=(int)maxNbBits; n>0; n--) { + valPerRank[n] = min; /* get starting value within each rank */ + min += nbPerRank[n]; + min >>= 1; + } } + for (n=0; nhuffNodeTbl; + nodeElt* const huffNode = huffNode0+1; + int nonNullRank; + + /* safety checks */ + if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */ + if (wkspSize < sizeof(HUF_buildCTable_wksp_tables)) + return ERROR(workSpace_tooSmall); + if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) + return ERROR(maxSymbolValue_tooLarge); + ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable)); + + /* sort, decreasing order */ + HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition); + + /* build tree */ + nonNullRank = HUF_buildTree(huffNode, maxSymbolValue); + + /* enforce maxTableLog */ + maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits); + if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */ + + HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits); + + return maxNbBits; +} + +size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) +{ + size_t nbBits = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + nbBits += CTable[s].nbBits * count[s]; + } + return nbBits >> 3; +} + +int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) { + int bad = 0; + int s; + for (s = 0; s <= (int)maxSymbolValue; ++s) { + bad |= (count[s] != 0) & (CTable[s].nbBits == 0); + } + return !bad; +} + +size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } + +FORCE_INLINE_TEMPLATE void +HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) +{ + BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); +} + +#define HUF_FLUSHBITS(s) BIT_flushBits(s) + +#define HUF_FLUSHBITS_1(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream) + +#define HUF_FLUSHBITS_2(stream) \ + if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream) + +FORCE_INLINE_TEMPLATE size_t +HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + const BYTE* ip = (const BYTE*) src; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + size_t n; + BIT_CStream_t bitC; + + /* init */ + if (dstSize < 8) return 0; /* not enough space to compress */ + { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op)); + if (HUF_isError(initErr)) return 0; } + + n = srcSize & ~3; /* join to mod 4 */ + switch (srcSize & 3) + { + case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable); + HUF_FLUSHBITS_2(&bitC); + /* fall-through */ + case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable); + HUF_FLUSHBITS_1(&bitC); + /* fall-through */ + case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable); + HUF_FLUSHBITS(&bitC); + /* fall-through */ + case 0 : /* fall-through */ + default: break; + } + + for (; n>0; n-=4) { /* note : n&3==0 at this stage */ + HUF_encodeSymbol(&bitC, ip[n- 1], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 2], CTable); + HUF_FLUSHBITS_2(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 3], CTable); + HUF_FLUSHBITS_1(&bitC); + HUF_encodeSymbol(&bitC, ip[n- 4], CTable); + HUF_FLUSHBITS(&bitC); + } + + return BIT_closeCStream(&bitC); +} + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + if (bmi2) { + return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable); + } + return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable); +} + +#else + +static size_t +HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, const int bmi2) +{ + (void)bmi2; + return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable); +} + +#endif + +size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + + +static size_t +HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize, + const void* src, size_t srcSize, + const HUF_CElt* CTable, int bmi2) +{ + size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */ + if (srcSize < 12) return 0; /* no saving possible : too small input */ + op += 6; /* jumpTable */ + + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart+2, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) ); + if (cSize==0) return 0; + assert(cSize <= 65535); + MEM_writeLE16(ostart+4, (U16)cSize); + op += cSize; + } + + ip += segmentSize; + assert(op <= oend); + assert(ip <= iend); + { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) ); + if (cSize==0) return 0; + op += cSize; + } + + return (size_t)(op-ostart); +} + +size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable) +{ + return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0); +} + +typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e; + +static size_t HUF_compressCTable_internal( + BYTE* const ostart, BYTE* op, BYTE* const oend, + const void* src, size_t srcSize, + HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2) +{ + size_t const cSize = (nbStreams==HUF_singleStream) ? + HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) : + HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2); + if (HUF_isError(cSize)) { return cSize; } + if (cSize==0) { return 0; } /* uncompressible */ + op += cSize; + /* check compressibility */ + assert(op >= ostart); + if ((size_t)(op-ostart) >= srcSize-1) { return 0; } + return (size_t)(op-ostart); +} + +typedef struct { + unsigned count[HUF_SYMBOLVALUE_MAX + 1]; + HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1]; + union { + HUF_buildCTable_wksp_tables buildCTable_wksp; + HUF_WriteCTableWksp writeCTable_wksp; + } wksps; +} HUF_compress_tables_t; + +/* HUF_compress_internal() : + * `workSpace_align4` must be aligned on 4-bytes boundaries, + * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */ +static size_t +HUF_compress_internal (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + HUF_nbStreams_e nbStreams, + void* workSpace_align4, size_t wkspSize, + HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat, + const int bmi2) +{ + HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart; + + HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE); + assert(((size_t)workSpace_align4 & 3) == 0); /* must be aligned on 4-bytes boundaries */ + + /* checks & inits */ + if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall); + if (!srcSize) return 0; /* Uncompressed */ + if (!dstSize) return 0; /* cannot fit anything within dst budget */ + if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ + if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); + if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX; + if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; + + /* Heuristic : If old table is valid, use it for small inputs */ + if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } + + /* Scan input and build symbol stats */ + { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) ); + if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ + if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */ + } + + /* Check validity of previous table */ + if ( repeat + && *repeat == HUF_repeat_check + && !HUF_validateCTable(oldHufTable, table->count, maxSymbolValue)) { + *repeat = HUF_repeat_none; + } + /* Heuristic : use existing table for small inputs */ + if (preferRepeat && repeat && *repeat != HUF_repeat_none) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } + + /* Build Huffman Tree */ + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count, + maxSymbolValue, huffLog, + &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp)); + CHECK_F(maxBits); + huffLog = (U32)maxBits; + /* Zero unused symbols in CTable, so we can check it for validity */ + ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0, + sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt))); + } + + /* Write table description header */ + { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog, + &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) ); + /* Check if using previous huffman table is beneficial */ + if (repeat && *repeat != HUF_repeat_none) { + size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue); + size_t const newSize = HUF_estimateCompressedSize(table->CTable, table->count, maxSymbolValue); + if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, oldHufTable, bmi2); + } } + + /* Use the new huffman table */ + if (hSize + 12ul >= srcSize) { return 0; } + op += hSize; + if (repeat) { *repeat = HUF_repeat_none; } + if (oldHufTable) + ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */ + } + return HUF_compressCTable_internal(ostart, op, oend, + src, srcSize, + nbStreams, table->CTable, bmi2); +} + + +size_t HUF_compress1X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_singleStream, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} + +size_t HUF_compress1X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_singleStream, + workSpace, wkspSize, hufTable, + repeat, preferRepeat, bmi2); +} + +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * provide workspace to generate compression tables */ +size_t HUF_compress4X_wksp (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_fourStreams, + workSpace, wkspSize, + NULL, NULL, 0, 0 /*bmi2*/); +} + +/* HUF_compress4X_repeat(): + * compress input using 4 streams. + * re-use an existing huffman compression table */ +size_t HUF_compress4X_repeat (void* dst, size_t dstSize, + const void* src, size_t srcSize, + unsigned maxSymbolValue, unsigned huffLog, + void* workSpace, size_t wkspSize, + HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2) +{ + return HUF_compress_internal(dst, dstSize, src, srcSize, + maxSymbolValue, huffLog, HUF_fourStreams, + workSpace, wkspSize, + hufTable, repeat, preferRepeat, bmi2); +} + diff --git a/lib/zstd/compress/zstd_compress.c b/lib/zstd/compress/zstd_compress.c new file mode 100644 index 000000000000..78aa14c50dd2 --- /dev/null +++ b/lib/zstd/compress/zstd_compress.c @@ -0,0 +1,5105 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/*-************************************* +* Dependencies +***************************************/ +#include "../common/zstd_deps.h" /* INT_MAX, ZSTD_memset, ZSTD_memcpy */ +#include "../common/cpu.h" +#include "../common/mem.h" +#include "hist.h" /* HIST_countFast_wksp */ +#define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */ +#include "../common/fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "../common/huf.h" +#include "zstd_compress_internal.h" +#include "zstd_compress_sequences.h" +#include "zstd_compress_literals.h" +#include "zstd_fast.h" +#include "zstd_double_fast.h" +#include "zstd_lazy.h" +#include "zstd_opt.h" +#include "zstd_ldm.h" +#include "zstd_compress_superblock.h" + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * COMPRESS_HEAPMODE : + * Select how default decompression function ZSTD_compress() allocates its context, + * on stack (0, default), or into heap (1). + * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected. + */ + + +/*-************************************* +* Helper functions +***************************************/ +/* ZSTD_compressBound() + * Note that the result from this function is only compatible with the "normal" + * full-block strategy. + * When there are a lot of small blocks due to frequent flush in streaming mode + * the overhead of headers can make the compressed data to be larger than the + * return value of ZSTD_compressBound(). + */ +size_t ZSTD_compressBound(size_t srcSize) { + return ZSTD_COMPRESSBOUND(srcSize); +} + + +/*-************************************* +* Context memory management +***************************************/ +struct ZSTD_CDict_s { + const void* dictContent; + size_t dictContentSize; + ZSTD_dictContentType_e dictContentType; /* The dictContentType the CDict was created with */ + U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ + ZSTD_cwksp workspace; + ZSTD_matchState_t matchState; + ZSTD_compressedBlockState_t cBlockState; + ZSTD_customMem customMem; + U32 dictID; + int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */ +}; /* typedef'd to ZSTD_CDict within "zstd.h" */ + +ZSTD_CCtx* ZSTD_createCCtx(void) +{ + return ZSTD_createCCtx_advanced(ZSTD_defaultCMem); +} + +static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager) +{ + assert(cctx != NULL); + ZSTD_memset(cctx, 0, sizeof(*cctx)); + cctx->customMem = memManager; + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + { size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters); + assert(!ZSTD_isError(err)); + (void)err; + } +} + +ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem) +{ + ZSTD_STATIC_ASSERT(zcss_init==0); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1)); + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_customMalloc(sizeof(ZSTD_CCtx), customMem); + if (!cctx) return NULL; + ZSTD_initCCtx(cctx, customMem); + return cctx; + } +} + +ZSTD_CCtx* ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize) +{ + ZSTD_cwksp ws; + ZSTD_CCtx* cctx; + if (workspaceSize <= sizeof(ZSTD_CCtx)) return NULL; /* minimum size */ + if ((size_t)workspace & 7) return NULL; /* must be 8-aligned */ + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); + + cctx = (ZSTD_CCtx*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CCtx)); + if (cctx == NULL) return NULL; + + ZSTD_memset(cctx, 0, sizeof(ZSTD_CCtx)); + ZSTD_cwksp_move(&cctx->workspace, &ws); + cctx->staticSize = workspaceSize; + + /* statically sized space. entropyWorkspace never moves (but prev/next block swap places) */ + if (!ZSTD_cwksp_check_available(&cctx->workspace, ENTROPY_WORKSPACE_SIZE + 2 * sizeof(ZSTD_compressedBlockState_t))) return NULL; + cctx->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->blockState.nextCBlock = (ZSTD_compressedBlockState_t*)ZSTD_cwksp_reserve_object(&cctx->workspace, sizeof(ZSTD_compressedBlockState_t)); + cctx->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cctx->workspace, ENTROPY_WORKSPACE_SIZE); + cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + return cctx; +} + +/** + * Clears and frees all of the dictionaries in the CCtx. + */ +static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) +{ + ZSTD_customFree(cctx->localDict.dictBuffer, cctx->customMem); + ZSTD_freeCDict(cctx->localDict.cdict); + ZSTD_memset(&cctx->localDict, 0, sizeof(cctx->localDict)); + ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); + cctx->cdict = NULL; +} + +static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) +{ + size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; + size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); + return bufferSize + cdictSize; +} + +static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) +{ + assert(cctx != NULL); + assert(cctx->staticSize == 0); + ZSTD_clearAllDicts(cctx); + ZSTD_cwksp_free(&cctx->workspace, cctx->customMem); +} + +size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support free on NULL */ + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "not compatible with static CCtx"); + { + int cctxInWorkspace = ZSTD_cwksp_owns_buffer(&cctx->workspace, cctx); + ZSTD_freeCCtxContent(cctx); + if (!cctxInWorkspace) { + ZSTD_customFree(cctx, cctx->customMem); + } + } + return 0; +} + + +static size_t ZSTD_sizeof_mtctx(const ZSTD_CCtx* cctx) +{ + (void)cctx; + return 0; +} + + +size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx) +{ + if (cctx==NULL) return 0; /* support sizeof on NULL */ + /* cctx may be in the workspace */ + return (cctx->workspace.workspace == cctx ? 0 : sizeof(*cctx)) + + ZSTD_cwksp_sizeof(&cctx->workspace) + + ZSTD_sizeof_localDict(cctx->localDict) + + ZSTD_sizeof_mtctx(cctx); +} + +size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs) +{ + return ZSTD_sizeof_CCtx(zcs); /* same object */ +} + +/* private API call, for dictBuilder only */ +const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); } + +/* Returns 1 if compression parameters are such that we should + * enable long distance matching (wlog >= 27, strategy >= btopt). + * Returns 0 otherwise. + */ +static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) { + return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27; +} + +static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams( + ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params cctxParams; + /* should not matter, as all cParams are presumed properly defined */ + ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT); + cctxParams.cParams = cParams; + + if (ZSTD_CParams_shouldEnableLdm(&cParams)) { + DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params"); + cctxParams.ldmParams.enableLdm = 1; + /* LDM is enabled by default for optimal parser and window size >= 128MB */ + ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams); + assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog); + assert(cctxParams.ldmParams.hashRateLog < 32); + } + + assert(!ZSTD_checkCParams(cParams)); + return cctxParams; +} + +static ZSTD_CCtx_params* ZSTD_createCCtxParams_advanced( + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params* params; + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + params = (ZSTD_CCtx_params*)ZSTD_customCalloc( + sizeof(ZSTD_CCtx_params), customMem); + if (!params) { return NULL; } + ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); + params->customMem = customMem; + return params; +} + +ZSTD_CCtx_params* ZSTD_createCCtxParams(void) +{ + return ZSTD_createCCtxParams_advanced(ZSTD_defaultCMem); +} + +size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params) +{ + if (params == NULL) { return 0; } + ZSTD_customFree(params, params->customMem); + return 0; +} + +size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params) +{ + return ZSTD_CCtxParams_init(params, ZSTD_CLEVEL_DEFAULT); +} + +size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->compressionLevel = compressionLevel; + cctxParams->fParams.contentSizeFlag = 1; + return 0; +} + +#define ZSTD_NO_CLEVEL 0 + +/** + * Initializes the cctxParams from params and compressionLevel. + * @param compressionLevel If params are derived from a compression level then that compression level, otherwise ZSTD_NO_CLEVEL. + */ +static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_parameters const* params, int compressionLevel) +{ + assert(!ZSTD_checkCParams(params->cParams)); + ZSTD_memset(cctxParams, 0, sizeof(*cctxParams)); + cctxParams->cParams = params->cParams; + cctxParams->fParams = params->fParams; + /* Should not matter, as all cParams are presumed properly defined. + * But, set it for tracing anyway. + */ + cctxParams->compressionLevel = compressionLevel; +} + +size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) +{ + RETURN_ERROR_IF(!cctxParams, GENERIC, "NULL pointer!"); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + ZSTD_CCtxParams_init_internal(cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return 0; +} + +/** + * Sets cctxParams' cParams and fParams from params, but otherwise leaves them alone. + * @param param Validated zstd parameters. + */ +static void ZSTD_CCtxParams_setZstdParams( + ZSTD_CCtx_params* cctxParams, const ZSTD_parameters* params) +{ + assert(!ZSTD_checkCParams(params->cParams)); + cctxParams->cParams = params->cParams; + cctxParams->fParams = params->fParams; + /* Should not matter, as all cParams are presumed properly defined. + * But, set it for tracing anyway. + */ + cctxParams->compressionLevel = ZSTD_NO_CLEVEL; +} + +ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param) +{ + ZSTD_bounds bounds = { 0, 0, 0 }; + + switch(param) + { + case ZSTD_c_compressionLevel: + bounds.lowerBound = ZSTD_minCLevel(); + bounds.upperBound = ZSTD_maxCLevel(); + return bounds; + + case ZSTD_c_windowLog: + bounds.lowerBound = ZSTD_WINDOWLOG_MIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; + + case ZSTD_c_hashLog: + bounds.lowerBound = ZSTD_HASHLOG_MIN; + bounds.upperBound = ZSTD_HASHLOG_MAX; + return bounds; + + case ZSTD_c_chainLog: + bounds.lowerBound = ZSTD_CHAINLOG_MIN; + bounds.upperBound = ZSTD_CHAINLOG_MAX; + return bounds; + + case ZSTD_c_searchLog: + bounds.lowerBound = ZSTD_SEARCHLOG_MIN; + bounds.upperBound = ZSTD_SEARCHLOG_MAX; + return bounds; + + case ZSTD_c_minMatch: + bounds.lowerBound = ZSTD_MINMATCH_MIN; + bounds.upperBound = ZSTD_MINMATCH_MAX; + return bounds; + + case ZSTD_c_targetLength: + bounds.lowerBound = ZSTD_TARGETLENGTH_MIN; + bounds.upperBound = ZSTD_TARGETLENGTH_MAX; + return bounds; + + case ZSTD_c_strategy: + bounds.lowerBound = ZSTD_STRATEGY_MIN; + bounds.upperBound = ZSTD_STRATEGY_MAX; + return bounds; + + case ZSTD_c_contentSizeFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_checksumFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_dictIDFlag: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_nbWorkers: + bounds.lowerBound = 0; + bounds.upperBound = 0; + return bounds; + + case ZSTD_c_jobSize: + bounds.lowerBound = 0; + bounds.upperBound = 0; + return bounds; + + case ZSTD_c_overlapLog: + bounds.lowerBound = 0; + bounds.upperBound = 0; + return bounds; + + case ZSTD_c_enableDedicatedDictSearch: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_enableLongDistanceMatching: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_ldmHashLog: + bounds.lowerBound = ZSTD_LDM_HASHLOG_MIN; + bounds.upperBound = ZSTD_LDM_HASHLOG_MAX; + return bounds; + + case ZSTD_c_ldmMinMatch: + bounds.lowerBound = ZSTD_LDM_MINMATCH_MIN; + bounds.upperBound = ZSTD_LDM_MINMATCH_MAX; + return bounds; + + case ZSTD_c_ldmBucketSizeLog: + bounds.lowerBound = ZSTD_LDM_BUCKETSIZELOG_MIN; + bounds.upperBound = ZSTD_LDM_BUCKETSIZELOG_MAX; + return bounds; + + case ZSTD_c_ldmHashRateLog: + bounds.lowerBound = ZSTD_LDM_HASHRATELOG_MIN; + bounds.upperBound = ZSTD_LDM_HASHRATELOG_MAX; + return bounds; + + /* experimental parameters */ + case ZSTD_c_rsyncable: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_forceMaxWindow : + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + case ZSTD_c_format: + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + bounds.lowerBound = ZSTD_f_zstd1; + bounds.upperBound = ZSTD_f_zstd1_magicless; /* note : how to ensure at compile time that this is the highest value enum ? */ + return bounds; + + case ZSTD_c_forceAttachDict: + ZSTD_STATIC_ASSERT(ZSTD_dictDefaultAttach < ZSTD_dictForceLoad); + bounds.lowerBound = ZSTD_dictDefaultAttach; + bounds.upperBound = ZSTD_dictForceLoad; /* note : how to ensure at compile time that this is the highest value enum ? */ + return bounds; + + case ZSTD_c_literalCompressionMode: + ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); + bounds.lowerBound = ZSTD_lcm_auto; + bounds.upperBound = ZSTD_lcm_uncompressed; + return bounds; + + case ZSTD_c_targetCBlockSize: + bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; + bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; + return bounds; + + case ZSTD_c_srcSizeHint: + bounds.lowerBound = ZSTD_SRCSIZEHINT_MIN; + bounds.upperBound = ZSTD_SRCSIZEHINT_MAX; + return bounds; + + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + + case ZSTD_c_blockDelimiters: + bounds.lowerBound = (int)ZSTD_sf_noBlockDelimiters; + bounds.upperBound = (int)ZSTD_sf_explicitBlockDelimiters; + return bounds; + + case ZSTD_c_validateSequences: + bounds.lowerBound = 0; + bounds.upperBound = 1; + return bounds; + + default: + bounds.error = ERROR(parameter_unsupported); + return bounds; + } +} + +/* ZSTD_cParam_clampBounds: + * Clamps the value into the bounded range. + */ +static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return bounds.error; + if (*value < bounds.lowerBound) *value = bounds.lowerBound; + if (*value > bounds.upperBound) *value = bounds.upperBound; + return 0; +} + +#define BOUNDCHECK(cParam, val) { \ + RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ + parameter_outOfBound, "Param out of bounds"); \ +} + + +static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) +{ + switch(param) + { + case ZSTD_c_compressionLevel: + case ZSTD_c_hashLog: + case ZSTD_c_chainLog: + case ZSTD_c_searchLog: + case ZSTD_c_minMatch: + case ZSTD_c_targetLength: + case ZSTD_c_strategy: + return 1; + + case ZSTD_c_format: + case ZSTD_c_windowLog: + case ZSTD_c_contentSizeFlag: + case ZSTD_c_checksumFlag: + case ZSTD_c_dictIDFlag: + case ZSTD_c_forceMaxWindow : + case ZSTD_c_nbWorkers: + case ZSTD_c_jobSize: + case ZSTD_c_overlapLog: + case ZSTD_c_rsyncable: + case ZSTD_c_enableDedicatedDictSearch: + case ZSTD_c_enableLongDistanceMatching: + case ZSTD_c_ldmHashLog: + case ZSTD_c_ldmMinMatch: + case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: + case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + case ZSTD_c_blockDelimiters: + case ZSTD_c_validateSequences: + default: + return 0; + } +} + +size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParameter (%i, %i)", (int)param, value); + if (cctx->streamStage != zcss_init) { + if (ZSTD_isUpdateAuthorized(param)) { + cctx->cParamsChanged = 1; + } else { + RETURN_ERROR(stage_wrong, "can only set params in ctx init stage"); + } } + + switch(param) + { + case ZSTD_c_nbWorkers: + RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, + "MT not compatible with static alloc"); + break; + + case ZSTD_c_compressionLevel: + case ZSTD_c_windowLog: + case ZSTD_c_hashLog: + case ZSTD_c_chainLog: + case ZSTD_c_searchLog: + case ZSTD_c_minMatch: + case ZSTD_c_targetLength: + case ZSTD_c_strategy: + case ZSTD_c_ldmHashRateLog: + case ZSTD_c_format: + case ZSTD_c_contentSizeFlag: + case ZSTD_c_checksumFlag: + case ZSTD_c_dictIDFlag: + case ZSTD_c_forceMaxWindow: + case ZSTD_c_forceAttachDict: + case ZSTD_c_literalCompressionMode: + case ZSTD_c_jobSize: + case ZSTD_c_overlapLog: + case ZSTD_c_rsyncable: + case ZSTD_c_enableDedicatedDictSearch: + case ZSTD_c_enableLongDistanceMatching: + case ZSTD_c_ldmHashLog: + case ZSTD_c_ldmMinMatch: + case ZSTD_c_ldmBucketSizeLog: + case ZSTD_c_targetCBlockSize: + case ZSTD_c_srcSizeHint: + case ZSTD_c_stableInBuffer: + case ZSTD_c_stableOutBuffer: + case ZSTD_c_blockDelimiters: + case ZSTD_c_validateSequences: + break; + + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } + return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); +} + +size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, + ZSTD_cParameter param, int value) +{ + DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); + switch(param) + { + case ZSTD_c_format : + BOUNDCHECK(ZSTD_c_format, value); + CCtxParams->format = (ZSTD_format_e)value; + return (size_t)CCtxParams->format; + + case ZSTD_c_compressionLevel : { + FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), ""); + if (value == 0) + CCtxParams->compressionLevel = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + else + CCtxParams->compressionLevel = value; + if (CCtxParams->compressionLevel >= 0) return (size_t)CCtxParams->compressionLevel; + return 0; /* return type (size_t) cannot represent negative values */ + } + + case ZSTD_c_windowLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_windowLog, value); + CCtxParams->cParams.windowLog = (U32)value; + return CCtxParams->cParams.windowLog; + + case ZSTD_c_hashLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_hashLog, value); + CCtxParams->cParams.hashLog = (U32)value; + return CCtxParams->cParams.hashLog; + + case ZSTD_c_chainLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_chainLog, value); + CCtxParams->cParams.chainLog = (U32)value; + return CCtxParams->cParams.chainLog; + + case ZSTD_c_searchLog : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_searchLog, value); + CCtxParams->cParams.searchLog = (U32)value; + return (size_t)value; + + case ZSTD_c_minMatch : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_minMatch, value); + CCtxParams->cParams.minMatch = value; + return CCtxParams->cParams.minMatch; + + case ZSTD_c_targetLength : + BOUNDCHECK(ZSTD_c_targetLength, value); + CCtxParams->cParams.targetLength = value; + return CCtxParams->cParams.targetLength; + + case ZSTD_c_strategy : + if (value!=0) /* 0 => use default */ + BOUNDCHECK(ZSTD_c_strategy, value); + CCtxParams->cParams.strategy = (ZSTD_strategy)value; + return (size_t)CCtxParams->cParams.strategy; + + case ZSTD_c_contentSizeFlag : + /* Content size written in frame header _when known_ (default:1) */ + DEBUGLOG(4, "set content size flag = %u", (value!=0)); + CCtxParams->fParams.contentSizeFlag = value != 0; + return CCtxParams->fParams.contentSizeFlag; + + case ZSTD_c_checksumFlag : + /* A 32-bits content checksum will be calculated and written at end of frame (default:0) */ + CCtxParams->fParams.checksumFlag = value != 0; + return CCtxParams->fParams.checksumFlag; + + case ZSTD_c_dictIDFlag : /* When applicable, dictionary's dictID is provided in frame header (default:1) */ + DEBUGLOG(4, "set dictIDFlag = %u", (value!=0)); + CCtxParams->fParams.noDictIDFlag = !value; + return !CCtxParams->fParams.noDictIDFlag; + + case ZSTD_c_forceMaxWindow : + CCtxParams->forceWindow = (value != 0); + return CCtxParams->forceWindow; + + case ZSTD_c_forceAttachDict : { + const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value; + BOUNDCHECK(ZSTD_c_forceAttachDict, pref); + CCtxParams->attachDictPref = pref; + return CCtxParams->attachDictPref; + } + + case ZSTD_c_literalCompressionMode : { + const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; + BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); + CCtxParams->literalCompressionMode = lcm; + return CCtxParams->literalCompressionMode; + } + + case ZSTD_c_nbWorkers : + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; + + case ZSTD_c_jobSize : + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; + + case ZSTD_c_overlapLog : + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; + + case ZSTD_c_rsyncable : + RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); + return 0; + + case ZSTD_c_enableDedicatedDictSearch : + CCtxParams->enableDedicatedDictSearch = (value!=0); + return CCtxParams->enableDedicatedDictSearch; + + case ZSTD_c_enableLongDistanceMatching : + CCtxParams->ldmParams.enableLdm = (value!=0); + return CCtxParams->ldmParams.enableLdm; + + case ZSTD_c_ldmHashLog : + if (value!=0) /* 0 ==> auto */ + BOUNDCHECK(ZSTD_c_ldmHashLog, value); + CCtxParams->ldmParams.hashLog = value; + return CCtxParams->ldmParams.hashLog; + + case ZSTD_c_ldmMinMatch : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmMinMatch, value); + CCtxParams->ldmParams.minMatchLength = value; + return CCtxParams->ldmParams.minMatchLength; + + case ZSTD_c_ldmBucketSizeLog : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_ldmBucketSizeLog, value); + CCtxParams->ldmParams.bucketSizeLog = value; + return CCtxParams->ldmParams.bucketSizeLog; + + case ZSTD_c_ldmHashRateLog : + RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, + parameter_outOfBound, "Param out of bounds!"); + CCtxParams->ldmParams.hashRateLog = value; + return CCtxParams->ldmParams.hashRateLog; + + case ZSTD_c_targetCBlockSize : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_targetCBlockSize, value); + CCtxParams->targetCBlockSize = value; + return CCtxParams->targetCBlockSize; + + case ZSTD_c_srcSizeHint : + if (value!=0) /* 0 ==> default */ + BOUNDCHECK(ZSTD_c_srcSizeHint, value); + CCtxParams->srcSizeHint = value; + return CCtxParams->srcSizeHint; + + case ZSTD_c_stableInBuffer: + BOUNDCHECK(ZSTD_c_stableInBuffer, value); + CCtxParams->inBufferMode = (ZSTD_bufferMode_e)value; + return CCtxParams->inBufferMode; + + case ZSTD_c_stableOutBuffer: + BOUNDCHECK(ZSTD_c_stableOutBuffer, value); + CCtxParams->outBufferMode = (ZSTD_bufferMode_e)value; + return CCtxParams->outBufferMode; + + case ZSTD_c_blockDelimiters: + BOUNDCHECK(ZSTD_c_blockDelimiters, value); + CCtxParams->blockDelimiters = (ZSTD_sequenceFormat_e)value; + return CCtxParams->blockDelimiters; + + case ZSTD_c_validateSequences: + BOUNDCHECK(ZSTD_c_validateSequences, value); + CCtxParams->validateSequences = value; + return CCtxParams->validateSequences; + + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } +} + +size_t ZSTD_CCtx_getParameter(ZSTD_CCtx const* cctx, ZSTD_cParameter param, int* value) +{ + return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); +} + +size_t ZSTD_CCtxParams_getParameter( + ZSTD_CCtx_params const* CCtxParams, ZSTD_cParameter param, int* value) +{ + switch(param) + { + case ZSTD_c_format : + *value = CCtxParams->format; + break; + case ZSTD_c_compressionLevel : + *value = CCtxParams->compressionLevel; + break; + case ZSTD_c_windowLog : + *value = (int)CCtxParams->cParams.windowLog; + break; + case ZSTD_c_hashLog : + *value = (int)CCtxParams->cParams.hashLog; + break; + case ZSTD_c_chainLog : + *value = (int)CCtxParams->cParams.chainLog; + break; + case ZSTD_c_searchLog : + *value = CCtxParams->cParams.searchLog; + break; + case ZSTD_c_minMatch : + *value = CCtxParams->cParams.minMatch; + break; + case ZSTD_c_targetLength : + *value = CCtxParams->cParams.targetLength; + break; + case ZSTD_c_strategy : + *value = (unsigned)CCtxParams->cParams.strategy; + break; + case ZSTD_c_contentSizeFlag : + *value = CCtxParams->fParams.contentSizeFlag; + break; + case ZSTD_c_checksumFlag : + *value = CCtxParams->fParams.checksumFlag; + break; + case ZSTD_c_dictIDFlag : + *value = !CCtxParams->fParams.noDictIDFlag; + break; + case ZSTD_c_forceMaxWindow : + *value = CCtxParams->forceWindow; + break; + case ZSTD_c_forceAttachDict : + *value = CCtxParams->attachDictPref; + break; + case ZSTD_c_literalCompressionMode : + *value = CCtxParams->literalCompressionMode; + break; + case ZSTD_c_nbWorkers : + assert(CCtxParams->nbWorkers == 0); + *value = CCtxParams->nbWorkers; + break; + case ZSTD_c_jobSize : + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); + case ZSTD_c_overlapLog : + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); + case ZSTD_c_rsyncable : + RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); + case ZSTD_c_enableDedicatedDictSearch : + *value = CCtxParams->enableDedicatedDictSearch; + break; + case ZSTD_c_enableLongDistanceMatching : + *value = CCtxParams->ldmParams.enableLdm; + break; + case ZSTD_c_ldmHashLog : + *value = CCtxParams->ldmParams.hashLog; + break; + case ZSTD_c_ldmMinMatch : + *value = CCtxParams->ldmParams.minMatchLength; + break; + case ZSTD_c_ldmBucketSizeLog : + *value = CCtxParams->ldmParams.bucketSizeLog; + break; + case ZSTD_c_ldmHashRateLog : + *value = CCtxParams->ldmParams.hashRateLog; + break; + case ZSTD_c_targetCBlockSize : + *value = (int)CCtxParams->targetCBlockSize; + break; + case ZSTD_c_srcSizeHint : + *value = (int)CCtxParams->srcSizeHint; + break; + case ZSTD_c_stableInBuffer : + *value = (int)CCtxParams->inBufferMode; + break; + case ZSTD_c_stableOutBuffer : + *value = (int)CCtxParams->outBufferMode; + break; + case ZSTD_c_blockDelimiters : + *value = (int)CCtxParams->blockDelimiters; + break; + case ZSTD_c_validateSequences : + *value = (int)CCtxParams->validateSequences; + break; + default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); + } + return 0; +} + +/** ZSTD_CCtx_setParametersUsingCCtxParams() : + * just applies `params` into `cctx` + * no action is performed, parameters are merely stored. + * If ZSTDMT is enabled, parameters are pushed to cctx->mtctx. + * This is possible even if a compression is ongoing. + * In which case, new parameters will be applied on the fly, starting with next compression job. + */ +size_t ZSTD_CCtx_setParametersUsingCCtxParams( + ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) +{ + DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "The context is in the wrong stage!"); + RETURN_ERROR_IF(cctx->cdict, stage_wrong, + "Can't override parameters with cdict attached (some must " + "be inherited from the cdict)."); + + cctx->requestedParams = *params; + return 0; +} + +ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't set pledgedSrcSize when not in init stage."); + cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; + return 0; +} + +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams( + int const compressionLevel, + size_t const dictSize); +static int ZSTD_dedicatedDictSearch_isSupported( + const ZSTD_compressionParameters* cParams); +static void ZSTD_dedicatedDictSearch_revertCParams( + ZSTD_compressionParameters* cParams); + +/** + * Initializes the local dict using the requested parameters. + * NOTE: This does not use the pledged src size, because it may be used for more + * than one compression. + */ +static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) +{ + ZSTD_localDict* const dl = &cctx->localDict; + if (dl->dict == NULL) { + /* No local dictionary. */ + assert(dl->dictBuffer == NULL); + assert(dl->cdict == NULL); + assert(dl->dictSize == 0); + return 0; + } + if (dl->cdict != NULL) { + assert(cctx->cdict == dl->cdict); + /* Local dictionary already initialized. */ + return 0; + } + assert(dl->dictSize > 0); + assert(cctx->cdict == NULL); + assert(cctx->prefixDict.dict == NULL); + + dl->cdict = ZSTD_createCDict_advanced2( + dl->dict, + dl->dictSize, + ZSTD_dlm_byRef, + dl->dictContentType, + &cctx->requestedParams, + cctx->customMem); + RETURN_ERROR_IF(!dl->cdict, memory_allocation, "ZSTD_createCDict_advanced failed"); + cctx->cdict = dl->cdict; + return 0; +} + +size_t ZSTD_CCtx_loadDictionary_advanced( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't load a dictionary when ctx is not in init stage."); + DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); + ZSTD_clearAllDicts(cctx); /* in case one already exists */ + if (dict == NULL || dictSize == 0) /* no dictionary mode */ + return 0; + if (dictLoadMethod == ZSTD_dlm_byRef) { + cctx->localDict.dict = dict; + } else { + void* dictBuffer; + RETURN_ERROR_IF(cctx->staticSize, memory_allocation, + "no malloc for static CCtx"); + dictBuffer = ZSTD_customMalloc(dictSize, cctx->customMem); + RETURN_ERROR_IF(!dictBuffer, memory_allocation, "NULL pointer!"); + ZSTD_memcpy(dictBuffer, dict, dictSize); + cctx->localDict.dictBuffer = dictBuffer; + cctx->localDict.dict = dictBuffer; + } + cctx->localDict.dictSize = dictSize; + cctx->localDict.dictContentType = dictContentType; + return 0; +} + +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference( + ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize) +{ + return ZSTD_CCtx_loadDictionary_advanced( + cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + + +size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a dict when ctx not in init stage."); + /* Free the existing local cdict (if any) to save memory. */ + ZSTD_clearAllDicts(cctx); + cctx->cdict = cdict; + return 0; +} + +size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a pool when ctx not in init stage."); + cctx->pool = pool; + return 0; +} + +size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_CCtx_refPrefix_advanced(cctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + +size_t ZSTD_CCtx_refPrefix_advanced( + ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't ref a prefix when ctx not in init stage."); + ZSTD_clearAllDicts(cctx); + if (prefix != NULL && prefixSize > 0) { + cctx->prefixDict.dict = prefix; + cctx->prefixDict.dictSize = prefixSize; + cctx->prefixDict.dictContentType = dictContentType; + } + return 0; +} + +/*! ZSTD_CCtx_reset() : + * Also dumps dictionary */ +size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset) +{ + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + cctx->streamStage = zcss_init; + cctx->pledgedSrcSizePlusOne = 0; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong, + "Can't reset parameters only when not in init stage."); + ZSTD_clearAllDicts(cctx); + return ZSTD_CCtxParams_reset(&cctx->requestedParams); + } + return 0; +} + + +/** ZSTD_checkCParams() : + control CParam values remain within authorized range. + @return : 0, or an error code if one value is beyond authorized range */ +size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) +{ + BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); + BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); + BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); + BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); + BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); + BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); + BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); + return 0; +} + +/** ZSTD_clampCParams() : + * make CParam values within valid range. + * @return : valid CParams */ +static ZSTD_compressionParameters +ZSTD_clampCParams(ZSTD_compressionParameters cParams) +{ +# define CLAMP_TYPE(cParam, val, type) { \ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); \ + if ((int)valbounds.upperBound) val=(type)bounds.upperBound; \ + } +# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) + CLAMP(ZSTD_c_windowLog, cParams.windowLog); + CLAMP(ZSTD_c_chainLog, cParams.chainLog); + CLAMP(ZSTD_c_hashLog, cParams.hashLog); + CLAMP(ZSTD_c_searchLog, cParams.searchLog); + CLAMP(ZSTD_c_minMatch, cParams.minMatch); + CLAMP(ZSTD_c_targetLength,cParams.targetLength); + CLAMP_TYPE(ZSTD_c_strategy,cParams.strategy, ZSTD_strategy); + return cParams; +} + +/** ZSTD_cycleLog() : + * condition for correct operation : hashLog > 1 */ +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) +{ + U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); + return hashLog - btScale; +} + +/** ZSTD_dictAndWindowLog() : + * Returns an adjusted window log that is large enough to fit the source and the dictionary. + * The zstd format says that the entire dictionary is valid if one byte of the dictionary + * is within the window. So the hashLog and chainLog should be large enough to reference both + * the dictionary and the window. So we must use this adjusted dictAndWindowLog when downsizing + * the hashLog and windowLog. + * NOTE: srcSize must not be ZSTD_CONTENTSIZE_UNKNOWN. + */ +static U32 ZSTD_dictAndWindowLog(U32 windowLog, U64 srcSize, U64 dictSize) +{ + const U64 maxWindowSize = 1ULL << ZSTD_WINDOWLOG_MAX; + /* No dictionary ==> No change */ + if (dictSize == 0) { + return windowLog; + } + assert(windowLog <= ZSTD_WINDOWLOG_MAX); + assert(srcSize != ZSTD_CONTENTSIZE_UNKNOWN); /* Handled in ZSTD_adjustCParams_internal() */ + { + U64 const windowSize = 1ULL << windowLog; + U64 const dictAndWindowSize = dictSize + windowSize; + /* If the window size is already large enough to fit both the source and the dictionary + * then just use the window size. Otherwise adjust so that it fits the dictionary and + * the window. + */ + if (windowSize >= dictSize + srcSize) { + return windowLog; /* Window size large enough already */ + } else if (dictAndWindowSize >= maxWindowSize) { + return ZSTD_WINDOWLOG_MAX; /* Larger than max window log */ + } else { + return ZSTD_highbit32((U32)dictAndWindowSize - 1) + 1; + } + } +} + +/** ZSTD_adjustCParams_internal() : + * optimize `cPar` for a specified input (`srcSize` and `dictSize`). + * mostly downsize to reduce memory consumption and initialization latency. + * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. + * `mode` is the mode for parameter adjustment. See docs for `ZSTD_cParamMode_e`. + * note : `srcSize==0` means 0! + * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ +static ZSTD_compressionParameters +ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize, + ZSTD_cParamMode_e mode) +{ + const U64 minSrcSize = 513; /* (1<<9) + 1 */ + const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); + assert(ZSTD_checkCParams(cPar)==0); + + switch (mode) { + case ZSTD_cpm_unknown: + case ZSTD_cpm_noAttachDict: + /* If we don't know the source size, don't make any + * assumptions about it. We will already have selected + * smaller parameters if a dictionary is in use. + */ + break; + case ZSTD_cpm_createCDict: + /* Assume a small source size when creating a dictionary + * with an unkown source size. + */ + if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN) + srcSize = minSrcSize; + break; + case ZSTD_cpm_attachDict: + /* Dictionary has its own dedicated parameters which have + * already been selected. We are selecting parameters + * for only the source. + */ + dictSize = 0; + break; + default: + assert(0); + break; + } + + /* resize windowLog if input is small enough, to use less memory */ + if ( (srcSize < maxWindowResize) + && (dictSize < maxWindowResize) ) { + U32 const tSize = (U32)(srcSize + dictSize); + static U32 const hashSizeMin = 1 << ZSTD_HASHLOG_MIN; + U32 const srcLog = (tSize < hashSizeMin) ? ZSTD_HASHLOG_MIN : + ZSTD_highbit32(tSize-1) + 1; + if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; + } + if (srcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const dictAndWindowLog = ZSTD_dictAndWindowLog(cPar.windowLog, (U64)srcSize, (U64)dictSize); + U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); + if (cPar.hashLog > dictAndWindowLog+1) cPar.hashLog = dictAndWindowLog+1; + if (cycleLog > dictAndWindowLog) + cPar.chainLog -= (cycleLog - dictAndWindowLog); + } + + if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) + cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ + + return cPar; +} + +ZSTD_compressionParameters +ZSTD_adjustCParams(ZSTD_compressionParameters cPar, + unsigned long long srcSize, + size_t dictSize) +{ + cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ + if (srcSize == 0) srcSize = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize, ZSTD_cpm_unknown); +} + +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); + +static void ZSTD_overrideCParams( + ZSTD_compressionParameters* cParams, + const ZSTD_compressionParameters* overrides) +{ + if (overrides->windowLog) cParams->windowLog = overrides->windowLog; + if (overrides->hashLog) cParams->hashLog = overrides->hashLog; + if (overrides->chainLog) cParams->chainLog = overrides->chainLog; + if (overrides->searchLog) cParams->searchLog = overrides->searchLog; + if (overrides->minMatch) cParams->minMatch = overrides->minMatch; + if (overrides->targetLength) cParams->targetLength = overrides->targetLength; + if (overrides->strategy) cParams->strategy = overrides->strategy; +} + +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + ZSTD_compressionParameters cParams; + if (srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN && CCtxParams->srcSizeHint > 0) { + srcSizeHint = CCtxParams->srcSizeHint; + } + cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode); + if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG; + ZSTD_overrideCParams(&cParams, &CCtxParams->cParams); + assert(!ZSTD_checkCParams(cParams)); + /* srcSizeHint == 0 means 0 */ + return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize, mode); +} + +static size_t +ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams, + const U32 forCCtx) +{ + size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; + /* We don't use ZSTD_cwksp_alloc_size() here because the tables aren't + * surrounded by redzones in ASAN. */ + size_t const tableSpace = chainSize * sizeof(U32) + + hSize * sizeof(U32) + + h3Size * sizeof(U32); + size_t const optPotentialSpace = + ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32)) + + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32)) + + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32)) + + ZSTD_cwksp_alloc_size((1<strategy >= ZSTD_btopt)) + ? optPotentialSpace + : 0; + DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u", + (U32)chainSize, (U32)hSize, (U32)h3Size); + return tableSpace + optSpace; +} + +static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal( + const ZSTD_compressionParameters* cParams, + const ldmParams_t* ldmParams, + const int isStatic, + const size_t buffInSize, + const size_t buffOutSize, + const U64 pledgedSrcSize) +{ + size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + U32 const divider = (cParams->minMatch==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize) + + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef)) + + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE)); + size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE); + size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t)); + size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1); + + size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams); + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize); + size_t const ldmSeqSpace = ldmParams->enableLdm ? + ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0; + + + size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize) + + ZSTD_cwksp_alloc_size(buffOutSize); + + size_t const cctxSpace = isStatic ? ZSTD_cwksp_alloc_size(sizeof(ZSTD_CCtx)) : 0; + + size_t const neededSpace = + cctxSpace + + entropySpace + + blockStateSpace + + ldmSpace + + ldmSeqSpace + + matchStateSize + + tokenSpace + + bufferSpace; + + DEBUGLOG(5, "estimate workspace : %u", (U32)neededSpace); + return neededSpace; +} + +size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + /* estimateCCtxSize is for one-shot compression. So no buffers should + * be needed. However, we still allocate two 0-sized buffers, which can + * take space under ASAN. */ + return ZSTD_estimateCCtxSize_usingCCtxParams_internal( + &cParams, ¶ms->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN); +} + +size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); + return ZSTD_estimateCCtxSize_usingCCtxParams(¶ms); +} + +static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel) +{ + int tier = 0; + size_t largestSize = 0; + static const unsigned long long srcSizeTiers[4] = {16 KB, 128 KB, 256 KB, ZSTD_CONTENTSIZE_UNKNOWN}; + for (; tier < 4; ++tier) { + /* Choose the set of cParams for a given level across all srcSizes that give the largest cctxSize */ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeTiers[tier], 0, ZSTD_cpm_noAttachDict); + largestSize = MAX(ZSTD_estimateCCtxSize_usingCParams(cParams), largestSize); + } + return largestSize; +} + +size_t ZSTD_estimateCCtxSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + /* Ensure monotonically increasing memory usage as compression level increases */ + size_t const newMB = ZSTD_estimateCCtxSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) +{ + RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); + { ZSTD_compressionParameters const cParams = + ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); + size_t const inBuffSize = (params->inBufferMode == ZSTD_bm_buffered) + ? ((size_t)1 << cParams.windowLog) + blockSize + : 0; + size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered) + ? ZSTD_compressBound(blockSize) + 1 + : 0; + + return ZSTD_estimateCCtxSize_usingCCtxParams_internal( + &cParams, ¶ms->ldmParams, 1, inBuffSize, outBuffSize, + ZSTD_CONTENTSIZE_UNKNOWN); + } +} + +size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams) +{ + ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams); + return ZSTD_estimateCStreamSize_usingCCtxParams(¶ms); +} + +static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict); + return ZSTD_estimateCStreamSize_usingCParams(cParams); +} + +size_t ZSTD_estimateCStreamSize(int compressionLevel) +{ + int level; + size_t memBudget = 0; + for (level=MIN(compressionLevel, 1); level<=compressionLevel; level++) { + size_t const newMB = ZSTD_estimateCStreamSize_internal(level); + if (newMB > memBudget) memBudget = newMB; + } + return memBudget; +} + +/* ZSTD_getFrameProgression(): + * tells how much data has been consumed (input) and produced (output) for current frame. + * able to count progression inside worker threads (non-blocking mode). + */ +ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx) +{ + { ZSTD_frameProgression fp; + size_t const buffered = (cctx->inBuff == NULL) ? 0 : + cctx->inBuffPos - cctx->inToCompress; + if (buffered) assert(cctx->inBuffPos >= cctx->inToCompress); + assert(buffered <= ZSTD_BLOCKSIZE_MAX); + fp.ingested = cctx->consumedSrcSize + buffered; + fp.consumed = cctx->consumedSrcSize; + fp.produced = cctx->producedCSize; + fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */ + fp.currentJobID = 0; + fp.nbActiveWorkers = 0; + return fp; +} } + +/*! ZSTD_toFlushNow() + * Only useful for multithreading scenarios currently (nbWorkers >= 1). + */ +size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx) +{ + (void)cctx; + return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */ +} + +static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1, + ZSTD_compressionParameters cParams2) +{ + (void)cParams1; + (void)cParams2; + assert(cParams1.windowLog == cParams2.windowLog); + assert(cParams1.chainLog == cParams2.chainLog); + assert(cParams1.hashLog == cParams2.hashLog); + assert(cParams1.searchLog == cParams2.searchLog); + assert(cParams1.minMatch == cParams2.minMatch); + assert(cParams1.targetLength == cParams2.targetLength); + assert(cParams1.strategy == cParams2.strategy); +} + +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs) +{ + int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + bs->rep[i] = repStartValue[i]; + bs->entropy.huf.repeatMode = HUF_repeat_none; + bs->entropy.fse.offcode_repeatMode = FSE_repeat_none; + bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none; + bs->entropy.fse.litlength_repeatMode = FSE_repeat_none; +} + +/*! ZSTD_invalidateMatchState() + * Invalidate all the matches in the match finder tables. + * Requires nextSrc and base to be set (can be NULL). + */ +static void ZSTD_invalidateMatchState(ZSTD_matchState_t* ms) +{ + ZSTD_window_clear(&ms->window); + + ms->nextToUpdate = ms->window.dictLimit; + ms->loadedDictEnd = 0; + ms->opt.litLengthSum = 0; /* force reset of btopt stats */ + ms->dictMatchState = NULL; +} + +/** + * Controls, for this matchState reset, whether the tables need to be cleared / + * prepared for the coming compression (ZSTDcrp_makeClean), or whether the + * tables can be left unclean (ZSTDcrp_leaveDirty), because we know that a + * subsequent operation will overwrite the table space anyways (e.g., copying + * the matchState contents in from a CDict). + */ +typedef enum { + ZSTDcrp_makeClean, + ZSTDcrp_leaveDirty +} ZSTD_compResetPolicy_e; + +/** + * Controls, for this matchState reset, whether indexing can continue where it + * left off (ZSTDirp_continue), or whether it needs to be restarted from zero + * (ZSTDirp_reset). + */ +typedef enum { + ZSTDirp_continue, + ZSTDirp_reset +} ZSTD_indexResetPolicy_e; + +typedef enum { + ZSTD_resetTarget_CDict, + ZSTD_resetTarget_CCtx +} ZSTD_resetTarget_e; + +static size_t +ZSTD_reset_matchState(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + const ZSTD_compressionParameters* cParams, + const ZSTD_compResetPolicy_e crp, + const ZSTD_indexResetPolicy_e forceResetIndex, + const ZSTD_resetTarget_e forWho) +{ + size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); + size_t const hSize = ((size_t)1) << cParams->hashLog; + U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; + size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0; + + DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset); + if (forceResetIndex == ZSTDirp_reset) { + ZSTD_window_init(&ms->window); + ZSTD_cwksp_mark_tables_dirty(ws); + } + + ms->hashLog3 = hashLog3; + + ZSTD_invalidateMatchState(ms); + + assert(!ZSTD_cwksp_reserve_failed(ws)); /* check that allocation hasn't already failed */ + + ZSTD_cwksp_clear_tables(ws); + + DEBUGLOG(5, "reserving table space"); + /* table Space */ + ms->hashTable = (U32*)ZSTD_cwksp_reserve_table(ws, hSize * sizeof(U32)); + ms->chainTable = (U32*)ZSTD_cwksp_reserve_table(ws, chainSize * sizeof(U32)); + ms->hashTable3 = (U32*)ZSTD_cwksp_reserve_table(ws, h3Size * sizeof(U32)); + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); + + DEBUGLOG(4, "reset table : %u", crp!=ZSTDcrp_leaveDirty); + if (crp!=ZSTDcrp_leaveDirty) { + /* reset tables only */ + ZSTD_cwksp_clean_tables(ws); + } + + /* opt parser space */ + if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { + DEBUGLOG(4, "reserving optimal parser space"); + ms->opt.litFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (1<opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned)); + ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned)); + ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned)); + ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t)); + ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t)); + } + + ms->cParams = *cParams; + + RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation, + "failed a workspace allocation in ZSTD_reset_matchState"); + + return 0; +} + +/* ZSTD_indexTooCloseToMax() : + * minor optimization : prefer memset() rather than reduceIndex() + * which is measurably slow in some circumstances (reported for Visual Studio). + * Works when re-using a context for a lot of smallish inputs : + * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, + * memset() will be triggered before reduceIndex(). + */ +#define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) +static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) +{ + return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); +} + +/*! ZSTD_resetCCtx_internal() : + note : `params` are assumed fully validated at this stage */ +static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, + ZSTD_CCtx_params params, + U64 const pledgedSrcSize, + ZSTD_compResetPolicy_e const crp, + ZSTD_buffered_policy_e const zbuff) +{ + ZSTD_cwksp* const ws = &zc->workspace; + DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u", + (U32)pledgedSrcSize, params.cParams.windowLog); + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + + zc->isFirstBlock = 1; + + if (params.ldmParams.enableLdm) { + /* Adjust long distance matching parameters */ + ZSTD_ldm_adjustParameters(¶ms.ldmParams, ¶ms.cParams); + assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog); + assert(params.ldmParams.hashRateLog < 32); + } + + { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize)); + size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize); + U32 const divider = (params.cParams.minMatch==3) ? 3 : 4; + size_t const maxNbSeq = blockSize / divider; + size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered) + ? ZSTD_compressBound(blockSize) + 1 + : 0; + size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered) + ? windowSize + blockSize + : 0; + size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize); + + int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window); + ZSTD_indexResetPolicy_e needsIndexReset = + (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset; + + size_t const neededSpace = + ZSTD_estimateCCtxSize_usingCCtxParams_internal( + ¶ms.cParams, ¶ms.ldmParams, zc->staticSize != 0, + buffInSize, buffOutSize, pledgedSrcSize); + FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!"); + + if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0); + + /* Check if workspace is large enough, alloc a new one if needed */ + { + int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace; + int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace); + + DEBUGLOG(4, "Need %zu B workspace", neededSpace); + DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); + + if (workspaceTooSmall || workspaceWasteful) { + DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB", + ZSTD_cwksp_sizeof(ws) >> 10, + neededSpace >> 10); + + RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); + + needsIndexReset = ZSTDirp_reset; + + ZSTD_cwksp_free(ws, zc->customMem); + FORWARD_IF_ERROR(ZSTD_cwksp_create(ws, neededSpace, zc->customMem), ""); + + DEBUGLOG(5, "reserving object space"); + /* Statically sized space. + * entropyWorkspace never moves, + * though prev/next block swap places */ + assert(ZSTD_cwksp_check_available(ws, 2 * sizeof(ZSTD_compressedBlockState_t))); + zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.prevCBlock == NULL, memory_allocation, "couldn't allocate prevCBlock"); + zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t)); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock"); + zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE); + RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace"); + } } + + ZSTD_cwksp_clear(ws); + + /* init params */ + zc->appliedParams = params; + zc->blockState.matchState.cParams = params.cParams; + zc->pledgedSrcSizePlusOne = pledgedSrcSize+1; + zc->consumedSrcSize = 0; + zc->producedCSize = 0; + if (pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN) + zc->appliedParams.fParams.contentSizeFlag = 0; + DEBUGLOG(4, "pledged content size : %u ; flag : %u", + (unsigned)pledgedSrcSize, zc->appliedParams.fParams.contentSizeFlag); + zc->blockSize = blockSize; + + xxh64_reset(&zc->xxhState, 0); + zc->stage = ZSTDcs_init; + zc->dictID = 0; + zc->dictContentSize = 0; + + ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); + + /* ZSTD_wildcopy() is used to copy into the literals buffer, + * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes. + */ + zc->seqStore.litStart = ZSTD_cwksp_reserve_buffer(ws, blockSize + WILDCOPY_OVERLENGTH); + zc->seqStore.maxNbLit = blockSize; + + /* buffers */ + zc->bufferedPolicy = zbuff; + zc->inBuffSize = buffInSize; + zc->inBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffInSize); + zc->outBuffSize = buffOutSize; + zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize); + + /* ldm bucketOffsets table */ + if (params.ldmParams.enableLdm) { + /* TODO: avoid memset? */ + size_t const numBuckets = + ((size_t)1) << (params.ldmParams.hashLog - + params.ldmParams.bucketSizeLog); + zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets); + ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets); + } + + /* sequences storage */ + ZSTD_referenceExternalSequences(zc, NULL, 0); + zc->seqStore.maxNbSeq = maxNbSeq; + zc->seqStore.llCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.mlCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.ofCode = ZSTD_cwksp_reserve_buffer(ws, maxNbSeq * sizeof(BYTE)); + zc->seqStore.sequencesStart = (seqDef*)ZSTD_cwksp_reserve_aligned(ws, maxNbSeq * sizeof(seqDef)); + + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &zc->blockState.matchState, + ws, + ¶ms.cParams, + crp, + needsIndexReset, + ZSTD_resetTarget_CCtx), ""); + + /* ldm hash table */ + if (params.ldmParams.enableLdm) { + /* TODO: avoid memset? */ + size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog; + zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t)); + ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t)); + zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq)); + zc->maxNbLdmSequences = maxNbLdmSeq; + + ZSTD_window_init(&zc->ldmState.window); + ZSTD_window_clear(&zc->ldmState.window); + zc->ldmState.loadedDictEnd = 0; + } + + /* Due to alignment, when reusing a workspace, we can actually consume + * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h + */ + assert(ZSTD_cwksp_used(ws) >= neededSpace && + ZSTD_cwksp_used(ws) <= neededSpace + 3); + + DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws)); + zc->initialized = 1; + + return 0; + } +} + +/* ZSTD_invalidateRepCodes() : + * ensures next compression will not use repcodes from previous block. + * Note : only works with regular variant; + * do not use with extDict variant ! */ +void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx) { + int i; + for (i=0; iblockState.prevCBlock->rep[i] = 0; + assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window)); +} + +/* These are the approximate sizes for each strategy past which copying the + * dictionary tables into the working context is faster than using them + * in-place. + */ +static const size_t attachDictSizeCutoffs[ZSTD_STRATEGY_MAX+1] = { + 8 KB, /* unused */ + 8 KB, /* ZSTD_fast */ + 16 KB, /* ZSTD_dfast */ + 32 KB, /* ZSTD_greedy */ + 32 KB, /* ZSTD_lazy */ + 32 KB, /* ZSTD_lazy2 */ + 32 KB, /* ZSTD_btlazy2 */ + 32 KB, /* ZSTD_btopt */ + 8 KB, /* ZSTD_btultra */ + 8 KB /* ZSTD_btultra2 */ +}; + +static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + U64 pledgedSrcSize) +{ + size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy]; + int const dedicatedDictSearch = cdict->matchState.dedicatedDictSearch; + return dedicatedDictSearch + || ( ( pledgedSrcSize <= cutoff + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || params->attachDictPref == ZSTD_dictForceAttach ) + && params->attachDictPref != ZSTD_dictForceCopy + && !params->forceWindow ); /* dictMatchState isn't correctly + * handled in _enforceMaxDist */ +} + +static size_t +ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + { + ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams; + unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Resize working context table params for input only, since the dict + * has its own tables. */ + /* pledgedSrcSize == 0 means 0! */ + + if (cdict->matchState.dedicatedDictSearch) { + ZSTD_dedicatedDictSearch_revertCParams(&adjusted_cdict_cParams); + } + + params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize, + cdict->dictContentSize, ZSTD_cpm_attachDict); + params.cParams.windowLog = windowLog; + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_makeClean, zbuff), ""); + assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy); + } + + { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc + - cdict->matchState.window.base); + const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; + if (cdictLen == 0) { + /* don't even attach dictionaries with no contents */ + DEBUGLOG(4, "skipping attaching empty dictionary"); + } else { + DEBUGLOG(4, "attaching dictionary into context"); + cctx->blockState.matchState.dictMatchState = &cdict->matchState; + + /* prep working match state so dict matches never have negative indices + * when they are translated to the working context's index space. */ + if (cctx->blockState.matchState.window.dictLimit < cdictEnd) { + cctx->blockState.matchState.window.nextSrc = + cctx->blockState.matchState.window.base + cdictEnd; + ZSTD_window_clear(&cctx->blockState.matchState.window); + } + /* loadedDictEnd is expressed within the referential of the active context */ + cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; + } } + + cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + + return 0; +} + +static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + ZSTD_CCtx_params params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; + + assert(!cdict->matchState.dedicatedDictSearch); + + DEBUGLOG(4, "copying dictionary into context"); + + { unsigned const windowLog = params.cParams.windowLog; + assert(windowLog != 0); + /* Copy only compression parameters related to tables. */ + params.cParams = *cdict_cParams; + params.cParams.windowLog = windowLog; + FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, + ZSTDcrp_leaveDirty, zbuff), ""); + assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); + assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog); + assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog); + } + + ZSTD_cwksp_mark_tables_dirty(&cctx->workspace); + + /* copy tables */ + { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog); + size_t const hSize = (size_t)1 << cdict_cParams->hashLog; + + ZSTD_memcpy(cctx->blockState.matchState.hashTable, + cdict->matchState.hashTable, + hSize * sizeof(U32)); + ZSTD_memcpy(cctx->blockState.matchState.chainTable, + cdict->matchState.chainTable, + chainSize * sizeof(U32)); + } + + /* Zero the hashTable3, since the cdict never fills it */ + { int const h3log = cctx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; + assert(cdict->matchState.hashLog3 == 0); + ZSTD_memset(cctx->blockState.matchState.hashTable3, 0, h3Size * sizeof(U32)); + } + + ZSTD_cwksp_mark_tables_clean(&cctx->workspace); + + /* copy dictionary offsets */ + { ZSTD_matchState_t const* srcMatchState = &cdict->matchState; + ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + + cctx->dictID = cdict->dictID; + cctx->dictContentSize = cdict->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState)); + + return 0; +} + +/* We have a choice between copying the dictionary context into the working + * context, or referencing the dictionary context from the working context + * in-place. We decide here which strategy to use. */ +static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + + DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", + (unsigned)pledgedSrcSize); + + if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) { + return ZSTD_resetCCtx_byAttachingCDict( + cctx, cdict, *params, pledgedSrcSize, zbuff); + } else { + return ZSTD_resetCCtx_byCopyingCDict( + cctx, cdict, *params, pledgedSrcSize, zbuff); + } +} + +/*! ZSTD_copyCCtx_internal() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * The "context", in this case, refers to the hash and chain tables, + * entropy tables, and dictionary references. + * `windowLog` value is enforced if != 0, otherwise value is copied from srcCCtx. + * @return : 0, or an error code */ +static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx, + const ZSTD_CCtx* srcCCtx, + ZSTD_frameParameters fParams, + U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + DEBUGLOG(5, "ZSTD_copyCCtx_internal"); + RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong, + "Can't copy a ctx that's not in init stage."); + + ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); + { ZSTD_CCtx_params params = dstCCtx->requestedParams; + /* Copy only compression parameters related to tables. */ + params.cParams = srcCCtx->appliedParams.cParams; + params.fParams = fParams; + ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize, + ZSTDcrp_leaveDirty, zbuff); + assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog); + assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy); + assert(dstCCtx->appliedParams.cParams.hashLog == srcCCtx->appliedParams.cParams.hashLog); + assert(dstCCtx->appliedParams.cParams.chainLog == srcCCtx->appliedParams.cParams.chainLog); + assert(dstCCtx->blockState.matchState.hashLog3 == srcCCtx->blockState.matchState.hashLog3); + } + + ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace); + + /* copy tables */ + { size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog); + size_t const hSize = (size_t)1 << srcCCtx->appliedParams.cParams.hashLog; + int const h3log = srcCCtx->blockState.matchState.hashLog3; + size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0; + + ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable, + srcCCtx->blockState.matchState.hashTable, + hSize * sizeof(U32)); + ZSTD_memcpy(dstCCtx->blockState.matchState.chainTable, + srcCCtx->blockState.matchState.chainTable, + chainSize * sizeof(U32)); + ZSTD_memcpy(dstCCtx->blockState.matchState.hashTable3, + srcCCtx->blockState.matchState.hashTable3, + h3Size * sizeof(U32)); + } + + ZSTD_cwksp_mark_tables_clean(&dstCCtx->workspace); + + /* copy dictionary offsets */ + { + const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState; + ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; + dstMatchState->window = srcMatchState->window; + dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; + dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; + } + dstCCtx->dictID = srcCCtx->dictID; + dstCCtx->dictContentSize = srcCCtx->dictContentSize; + + /* copy block state */ + ZSTD_memcpy(dstCCtx->blockState.prevCBlock, srcCCtx->blockState.prevCBlock, sizeof(*srcCCtx->blockState.prevCBlock)); + + return 0; +} + +/*! ZSTD_copyCCtx() : + * Duplicate an existing context `srcCCtx` into another one `dstCCtx`. + * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()). + * pledgedSrcSize==0 means "unknown". +* @return : 0, or an error code */ +size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize) +{ + ZSTD_frameParameters fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + ZSTD_buffered_policy_e const zbuff = srcCCtx->bufferedPolicy; + ZSTD_STATIC_ASSERT((U32)ZSTDb_buffered==1); + if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; + fParams.contentSizeFlag = (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN); + + return ZSTD_copyCCtx_internal(dstCCtx, srcCCtx, + fParams, pledgedSrcSize, + zbuff); +} + + +#define ZSTD_ROWSIZE 16 +/*! ZSTD_reduceTable() : + * reduce table indexes by `reducerValue`, or squash to zero. + * PreserveMark preserves "unsorted mark" for btlazy2 strategy. + * It must be set to a clear 0/1 value, to remove branch during inlining. + * Presume table size is a multiple of ZSTD_ROWSIZE + * to help auto-vectorization */ +FORCE_INLINE_TEMPLATE void +ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerValue, int const preserveMark) +{ + int const nbRows = (int)size / ZSTD_ROWSIZE; + int cellNb = 0; + int rowNb; + assert((size & (ZSTD_ROWSIZE-1)) == 0); /* multiple of ZSTD_ROWSIZE */ + assert(size < (1U<<31)); /* can be casted to int */ + + + for (rowNb=0 ; rowNb < nbRows ; rowNb++) { + int column; + for (column=0; columncParams.hashLog; + ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); + } + + if (params->cParams.strategy != ZSTD_fast) { + U32 const chainSize = (U32)1 << params->cParams.chainLog; + if (params->cParams.strategy == ZSTD_btlazy2) + ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); + else + ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); + } + + if (ms->hashLog3) { + U32 const h3Size = (U32)1 << ms->hashLog3; + ZSTD_reduceTable(ms->hashTable3, h3Size, reducerValue); + } +} + + +/*-******************************************************* +* Block entropic compression +*********************************************************/ + +/* See doc/zstd_compression_format.md for detailed format description */ + +void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) +{ + const seqDef* const sequences = seqStorePtr->sequencesStart; + BYTE* const llCodeTable = seqStorePtr->llCode; + BYTE* const ofCodeTable = seqStorePtr->ofCode; + BYTE* const mlCodeTable = seqStorePtr->mlCode; + U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + U32 u; + assert(nbSeq <= seqStorePtr->maxNbSeq); + for (u=0; ulongLengthID==1) + llCodeTable[seqStorePtr->longLengthPos] = MaxLL; + if (seqStorePtr->longLengthID==2) + mlCodeTable[seqStorePtr->longLengthPos] = MaxML; +} + +/* ZSTD_useTargetCBlockSize(): + * Returns if target compressed block size param is being used. + * If used, compression will do best effort to make a compressed block size to be around targetCBlockSize. + * Returns 1 if true, 0 otherwise. */ +static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams) +{ + DEBUGLOG(5, "ZSTD_useTargetCBlockSize (targetCBlockSize=%zu)", cctxParams->targetCBlockSize); + return (cctxParams->targetCBlockSize != 0); +} + +/* ZSTD_entropyCompressSequences_internal(): + * actually compresses both literals and sequences */ +MEM_STATIC size_t +ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + void* entropyWorkspace, size_t entropyWkspSize, + const int bmi2) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + unsigned* count = (unsigned*)entropyWorkspace; + FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable; + U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */ + const seqDef* const sequences = seqStorePtr->sequencesStart; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + BYTE* seqHead; + BYTE* lastNCount = NULL; + + entropyWorkspace = count + (MaxSeq + 1); + entropyWkspSize -= (MaxSeq + 1) * sizeof(*count); + + DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq); + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= HUF_WORKSPACE_SIZE); + + /* Compress literals */ + { const BYTE* const literals = seqStorePtr->litStart; + size_t const litSize = (size_t)(seqStorePtr->lit - literals); + size_t const cSize = ZSTD_compressLiterals( + &prevEntropy->huf, &nextEntropy->huf, + cctxParams->cParams.strategy, + ZSTD_disableLiteralsCompression(cctxParams), + op, dstCapacity, + literals, litSize, + entropyWorkspace, entropyWkspSize, + bmi2); + FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed"); + assert(cSize <= dstCapacity); + op += cSize; + } + + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, "Can't fit seq hdr in output buf!"); + if (nbSeq < 128) { + *op++ = (BYTE)nbSeq; + } else if (nbSeq < LONGNBSEQ) { + op[0] = (BYTE)((nbSeq>>8) + 0x80); + op[1] = (BYTE)nbSeq; + op+=2; + } else { + op[0]=0xFF; + MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)); + op+=3; + } + assert(op <= oend); + if (nbSeq==0) { + /* Copy the old tables over as if we repeated them */ + ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); + return (size_t)(op - ostart); + } + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + assert(op <= oend); + + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, + count, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->fse.litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + count, max, llCodeTable, nbSeq, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->fse.litlengthCTable, + sizeof(prevEntropy->fse.litlengthCTable), + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); + if (LLtype == set_compressed) + lastNCount = op; + op += countSize; + assert(op <= oend); + } } + /* build CTable for Offsets */ + { unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp( + count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, + count, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->fse.offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + count, max, ofCodeTable, nbSeq, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->fse.offcodeCTable, + sizeof(prevEntropy->fse.offcodeCTable), + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); + if (Offtype == set_compressed) + lastNCount = op; + op += countSize; + assert(op <= oend); + } } + /* build CTable for MatchLengths */ + { unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp( + count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, + count, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->fse.matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable( + op, (size_t)(oend - op), + CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + count, max, mlCodeTable, nbSeq, + ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->fse.matchlengthCTable, + sizeof(prevEntropy->fse.matchlengthCTable), + entropyWorkspace, entropyWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); + if (MLtype == set_compressed) + lastNCount = op; + op += countSize; + assert(op <= oend); + } } + + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, (size_t)(oend - op), + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); + op += bitstreamSize; + assert(op <= oend); + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ + if (lastNCount && (op - lastNCount) < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(op - lastNCount == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } + } + + DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); + return (size_t)(op - ostart); +} + +MEM_STATIC size_t +ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + size_t srcSize, + void* entropyWorkspace, size_t entropyWkspSize, + int bmi2) +{ + size_t const cSize = ZSTD_entropyCompressSequences_internal( + seqStorePtr, prevEntropy, nextEntropy, cctxParams, + dst, dstCapacity, + entropyWorkspace, entropyWkspSize, bmi2); + if (cSize == 0) return 0; + /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block. + * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block. + */ + if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) + return 0; /* block not compressed */ + FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed"); + + /* Check compressibility */ + { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); + if (cSize >= maxCSize) return 0; /* block not compressed */ + } + DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize); + return cSize; +} + +/* ZSTD_selectBlockCompressor() : + * Not static, but internal use only (used by long distance matcher) + * assumption : strat is a valid strategy */ +ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode) +{ + static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = { + { ZSTD_compressBlock_fast /* default for 0 */, + ZSTD_compressBlock_fast, + ZSTD_compressBlock_doubleFast, + ZSTD_compressBlock_greedy, + ZSTD_compressBlock_lazy, + ZSTD_compressBlock_lazy2, + ZSTD_compressBlock_btlazy2, + ZSTD_compressBlock_btopt, + ZSTD_compressBlock_btultra, + ZSTD_compressBlock_btultra2 }, + { ZSTD_compressBlock_fast_extDict /* default for 0 */, + ZSTD_compressBlock_fast_extDict, + ZSTD_compressBlock_doubleFast_extDict, + ZSTD_compressBlock_greedy_extDict, + ZSTD_compressBlock_lazy_extDict, + ZSTD_compressBlock_lazy2_extDict, + ZSTD_compressBlock_btlazy2_extDict, + ZSTD_compressBlock_btopt_extDict, + ZSTD_compressBlock_btultra_extDict, + ZSTD_compressBlock_btultra_extDict }, + { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */, + ZSTD_compressBlock_fast_dictMatchState, + ZSTD_compressBlock_doubleFast_dictMatchState, + ZSTD_compressBlock_greedy_dictMatchState, + ZSTD_compressBlock_lazy_dictMatchState, + ZSTD_compressBlock_lazy2_dictMatchState, + ZSTD_compressBlock_btlazy2_dictMatchState, + ZSTD_compressBlock_btopt_dictMatchState, + ZSTD_compressBlock_btultra_dictMatchState, + ZSTD_compressBlock_btultra_dictMatchState }, + { NULL /* default for 0 */, + NULL, + NULL, + ZSTD_compressBlock_greedy_dedicatedDictSearch, + ZSTD_compressBlock_lazy_dedicatedDictSearch, + ZSTD_compressBlock_lazy2_dedicatedDictSearch, + NULL, + NULL, + NULL, + NULL } + }; + ZSTD_blockCompressor selectedCompressor; + ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1); + + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + selectedCompressor = blockCompressor[(int)dictMode][(int)strat]; + assert(selectedCompressor != NULL); + return selectedCompressor; +} + +static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr, + const BYTE* anchor, size_t lastLLSize) +{ + ZSTD_memcpy(seqStorePtr->lit, anchor, lastLLSize); + seqStorePtr->lit += lastLLSize; +} + +void ZSTD_resetSeqStore(seqStore_t* ssPtr) +{ + ssPtr->lit = ssPtr->litStart; + ssPtr->sequences = ssPtr->sequencesStart; + ssPtr->longLengthID = 0; +} + +typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; + +static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) +{ + ZSTD_matchState_t* const ms = &zc->blockState.matchState; + DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + /* Assert that we have correctly flushed the ctx params into the ms's copy */ + ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); + if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + if (zc->appliedParams.cParams.strategy >= ZSTD_btopt) { + ZSTD_ldm_skipRawSeqStoreBytes(&zc->externSeqStore, srcSize); + } else { + ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); + } + return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ + } + ZSTD_resetSeqStore(&(zc->seqStore)); + /* required for optimal parser to read stats from dictionary */ + ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; + /* tell the optimal parser how we expect to compress literals */ + ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; + /* a gap between an attached dict and the current window is not safe, + * they must remain adjacent, + * and when that stops being the case, the dict must be unset */ + assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit); + + /* limited update after a very long match */ + { const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const U32 curr = (U32)(istart-base); + if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */ + if (curr > ms->nextToUpdate + 384) + ms->nextToUpdate = curr - MIN(192, (U32)(curr - ms->nextToUpdate - 384)); + } + + /* select and store sequences */ + { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms); + size_t lastLLSize; + { int i; + for (i = 0; i < ZSTD_REP_NUM; ++i) + zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; + } + if (zc->externSeqStore.pos < zc->externSeqStore.size) { + assert(!zc->appliedParams.ldmParams.enableLdm); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&zc->externSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + src, srcSize); + assert(zc->externSeqStore.pos <= zc->externSeqStore.size); + } else if (zc->appliedParams.ldmParams.enableLdm) { + rawSeqStore_t ldmSeqStore = kNullRawSeqStore; + + ldmSeqStore.seq = zc->ldmSequences; + ldmSeqStore.capacity = zc->maxNbLdmSequences; + /* Updates ldmSeqStore.size */ + FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, + &zc->appliedParams.ldmParams, + src, srcSize), ""); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&ldmSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + src, srcSize); + assert(ldmSeqStore.pos == ldmSeqStore.size); + } else { /* not long range mode */ + ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode); + ms->ldmSeqStore = NULL; + lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize); + } + { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; + ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); + } } + return ZSTDbss_compress; +} + +static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc) +{ + const seqStore_t* seqStore = ZSTD_getSeqStore(zc); + const seqDef* seqStoreSeqs = seqStore->sequencesStart; + size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs; + size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart); + size_t literalsRead = 0; + size_t lastLLSize; + + ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex]; + size_t i; + repcodes_t updatedRepcodes; + + assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences); + /* Ensure we have enough space for last literals "sequence" */ + assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1); + ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t)); + for (i = 0; i < seqStoreSeqSize; ++i) { + U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM; + outSeqs[i].litLength = seqStoreSeqs[i].litLength; + outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH; + outSeqs[i].rep = 0; + + if (i == seqStore->longLengthPos) { + if (seqStore->longLengthID == 1) { + outSeqs[i].litLength += 0x10000; + } else if (seqStore->longLengthID == 2) { + outSeqs[i].matchLength += 0x10000; + } + } + + if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) { + /* Derive the correct offset corresponding to a repcode */ + outSeqs[i].rep = seqStoreSeqs[i].offset; + if (outSeqs[i].litLength != 0) { + rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1]; + } else { + if (outSeqs[i].rep == 3) { + rawOffset = updatedRepcodes.rep[0] - 1; + } else { + rawOffset = updatedRepcodes.rep[outSeqs[i].rep]; + } + } + } + outSeqs[i].offset = rawOffset; + /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode + so we provide seqStoreSeqs[i].offset - 1 */ + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, + seqStoreSeqs[i].offset - 1, + seqStoreSeqs[i].litLength == 0); + literalsRead += outSeqs[i].litLength; + } + /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0. + * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker + * for the block boundary, according to the API. + */ + assert(seqStoreLiteralsSize >= literalsRead); + lastLLSize = seqStoreLiteralsSize - literalsRead; + outSeqs[i].litLength = (U32)lastLLSize; + outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0; + seqStoreSeqSize++; + zc->seqCollector.seqIndex += seqStoreSeqSize; +} + +size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs, + size_t outSeqsSize, const void* src, size_t srcSize) +{ + const size_t dstCapacity = ZSTD_compressBound(srcSize); + void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem); + SeqCollector seqCollector; + + RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!"); + + seqCollector.collectSequences = 1; + seqCollector.seqStart = outSeqs; + seqCollector.seqIndex = 0; + seqCollector.maxSequences = outSeqsSize; + zc->seqCollector = seqCollector; + + ZSTD_compress2(zc, dst, dstCapacity, src, srcSize); + ZSTD_customFree(dst, ZSTD_defaultCMem); + return zc->seqCollector.seqIndex; +} + +size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize) { + size_t in = 0; + size_t out = 0; + for (; in < seqsSize; ++in) { + if (sequences[in].offset == 0 && sequences[in].matchLength == 0) { + if (in != seqsSize - 1) { + sequences[in+1].litLength += sequences[in].litLength; + } + } else { + sequences[out] = sequences[in]; + ++out; + } + } + return out; +} + +/* Unrolled loop to read four size_ts of input at a time. Returns 1 if is RLE, 0 if not. */ +static int ZSTD_isRLE(const BYTE* src, size_t length) { + const BYTE* ip = src; + const BYTE value = ip[0]; + const size_t valueST = (size_t)((U64)value * 0x0101010101010101ULL); + const size_t unrollSize = sizeof(size_t) * 4; + const size_t unrollMask = unrollSize - 1; + const size_t prefixLength = length & unrollMask; + size_t i; + size_t u; + if (length == 1) return 1; + /* Check if prefix is RLE first before using unrolled loop */ + if (prefixLength && ZSTD_count(ip+1, ip, ip+prefixLength) != prefixLength-1) { + return 0; + } + for (i = prefixLength; i != length; i += unrollSize) { + for (u = 0; u < unrollSize; u += sizeof(size_t)) { + if (MEM_readST(ip + i + u) != valueST) { + return 0; + } + } + } + return 1; +} + +/* Returns true if the given block may be RLE. + * This is just a heuristic based on the compressibility. + * It may return both false positives and false negatives. + */ +static int ZSTD_maybeRLE(seqStore_t const* seqStore) +{ + size_t const nbSeqs = (size_t)(seqStore->sequences - seqStore->sequencesStart); + size_t const nbLits = (size_t)(seqStore->lit - seqStore->litStart); + + return nbSeqs < 4 && nbLits < 10; +} + +static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc) +{ + ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock; + zc->blockState.prevCBlock = zc->blockState.nextCBlock; + zc->blockState.nextCBlock = tmp; +} + +static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, U32 frame) +{ + /* This the upper bound for the length of an rle block. + * This isn't the actual upper bound. Finding the real threshold + * needs further investigation. + */ + const U32 rleMaxLength = 25; + size_t cSize; + const BYTE* ip = (const BYTE*)src; + BYTE* op = (BYTE*)dst; + DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, + (unsigned)zc->blockState.matchState.nextToUpdate); + + { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } + } + + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + ZSTD_confirmRepcodesAndEntropyTables(zc); + return 0; + } + + /* encode sequences and literals */ + cSize = ZSTD_entropyCompressSequences(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + dst, dstCapacity, + srcSize, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + zc->bmi2); + + if (zc->seqCollector.collectSequences) { + ZSTD_copyBlockSequences(zc); + return 0; + } + + + if (frame && + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + cSize < rleMaxLength && + ZSTD_isRLE(ip, srcSize)) + { + cSize = 1; + op[0] = ip[0]; + } + +out: + if (!ZSTD_isError(cSize) && cSize > 1) { + ZSTD_confirmRepcodesAndEntropyTables(zc); + } + /* We check that dictionaries have offset codes available for the first + * block. After the first block, the offcode table might not have large + * enough codes to represent the offsets in the data. + */ + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const size_t bss, U32 lastBlock) +{ + DEBUGLOG(6, "Attempting ZSTD_compressSuperBlock()"); + if (bss == ZSTDbss_compress) { + if (/* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + !zc->isFirstBlock && + ZSTD_maybeRLE(&zc->seqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) + { + return ZSTD_rleCompressBlock(dst, dstCapacity, *(BYTE const*)src, srcSize, lastBlock); + } + /* Attempt superblock compression. + * + * Note that compressed size of ZSTD_compressSuperBlock() is not bound by the + * standard ZSTD_compressBound(). This is a problem, because even if we have + * space now, taking an extra byte now could cause us to run out of space later + * and violate ZSTD_compressBound(). + * + * Define blockBound(blockSize) = blockSize + ZSTD_blockHeaderSize. + * + * In order to respect ZSTD_compressBound() we must attempt to emit a raw + * uncompressed block in these cases: + * * cSize == 0: Return code for an uncompressed block. + * * cSize == dstSize_tooSmall: We may have expanded beyond blockBound(srcSize). + * ZSTD_noCompressBlock() will return dstSize_tooSmall if we are really out of + * output space. + * * cSize >= blockBound(srcSize): We have expanded the block too much so + * emit an uncompressed block. + */ + { + size_t const cSize = ZSTD_compressSuperBlock(zc, dst, dstCapacity, src, srcSize, lastBlock); + if (cSize != ERROR(dstSize_tooSmall)) { + size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed"); + if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) { + ZSTD_confirmRepcodesAndEntropyTables(zc); + return cSize; + } + } + } + } + + DEBUGLOG(6, "Resorting to ZSTD_noCompressBlock()"); + /* Superblock compression failed, attempt to emit a single no compress block. + * The decoder will be able to stream this block since it is uncompressed. + */ + return ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock); +} + +static size_t ZSTD_compressBlock_targetCBlockSize(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastBlock) +{ + size_t cSize = 0; + const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); + DEBUGLOG(5, "ZSTD_compressBlock_targetCBlockSize (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u, srcSize=%zu)", + (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, (unsigned)zc->blockState.matchState.nextToUpdate, srcSize); + FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed"); + + cSize = ZSTD_compressBlock_targetCBlockSize_body(zc, dst, dstCapacity, src, srcSize, bss, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize_body failed"); + + if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + return cSize; +} + +static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + void const* ip, + void const* iend) +{ + if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { + U32 const maxDist = (U32)1 << params->cParams.windowLog; + U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); + U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); + ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); + ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); + ZSTD_cwksp_mark_tables_dirty(ws); + ZSTD_reduceIndex(ms, params, correction); + ZSTD_cwksp_mark_tables_clean(ws); + if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; + else ms->nextToUpdate -= correction; + /* invalidate dictionaries on overflow correction */ + ms->loadedDictEnd = 0; + ms->dictMatchState = NULL; + } +} + +/*! ZSTD_compress_frameChunk() : +* Compress a chunk of data into one or multiple blocks. +* All blocks will be terminated, all input will be consumed. +* Function will issue an error if there is not enough `dstCapacity` to hold the compressed content. +* Frame is supposed already started (header already produced) +* @return : compressed size, or an error code +*/ +static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 lastFrameChunk) +{ + size_t blockSize = cctx->blockSize; + size_t remaining = srcSize; + const BYTE* ip = (const BYTE*)src; + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; + + assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); + + DEBUGLOG(4, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); + if (cctx->appliedParams.fParams.checksumFlag && srcSize) + xxh64_update(&cctx->xxhState, src, srcSize); + + while (remaining) { + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; + U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); + + RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, + dstSize_tooSmall, + "not enough space to store compressed block"); + if (remaining < blockSize) blockSize = remaining; + + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize); + ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); + + /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ + if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; + + { size_t cSize; + if (ZSTD_useTargetCBlockSize(&cctx->appliedParams)) { + cSize = ZSTD_compressBlock_targetCBlockSize(cctx, op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed"); + assert(cSize > 0); + assert(cSize <= blockSize + ZSTD_blockHeaderSize); + } else { + cSize = ZSTD_compressBlock_internal(cctx, + op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, + ip, blockSize, 1 /* frame */); + FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_internal failed"); + + if (cSize == 0) { /* block is not compressible */ + cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + } else { + U32 const cBlockHeader = cSize == 1 ? + lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) : + lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(op, cBlockHeader); + cSize += ZSTD_blockHeaderSize; + } + } + + + ip += blockSize; + assert(remaining >= blockSize); + remaining -= blockSize; + op += cSize; + assert(dstCapacity >= cSize); + dstCapacity -= cSize; + cctx->isFirstBlock = 0; + DEBUGLOG(5, "ZSTD_compress_frameChunk: adding a block of size %u", + (unsigned)cSize); + } } + + if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; + return (size_t)(op-ostart); +} + + +static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, U32 dictID) +{ BYTE* const op = (BYTE*)dst; + U32 const dictIDSizeCodeLength = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */ + U32 const dictIDSizeCode = params->fParams.noDictIDFlag ? 0 : dictIDSizeCodeLength; /* 0-3 */ + U32 const checksumFlag = params->fParams.checksumFlag>0; + U32 const windowSize = (U32)1 << params->cParams.windowLog; + U32 const singleSegment = params->fParams.contentSizeFlag && (windowSize >= pledgedSrcSize); + BYTE const windowLogByte = (BYTE)((params->cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); + U32 const fcsCode = params->fParams.contentSizeFlag ? + (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ + BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); + size_t pos=0; + + assert(!(params->fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); + RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall, + "dst buf is too small to fit worst-case frame header size."); + DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", + !params->fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); + if (params->format == ZSTD_f_zstd1) { + MEM_writeLE32(dst, ZSTD_MAGICNUMBER); + pos = 4; + } + op[pos++] = frameHeaderDescriptionByte; + if (!singleSegment) op[pos++] = windowLogByte; + switch(dictIDSizeCode) + { + default: assert(0); /* impossible */ + case 0 : break; + case 1 : op[pos] = (BYTE)(dictID); pos++; break; + case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break; + case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break; + } + switch(fcsCode) + { + default: assert(0); /* impossible */ + case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break; + case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break; + case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break; + case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break; + } + return pos; +} + +/* ZSTD_writeSkippableFrame_advanced() : + * Writes out a skippable frame with the specified magic number variant (16 are supported), + * from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15, and the desired source data. + * + * Returns the total number of bytes written, or a ZSTD error code. + */ +size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, unsigned magicVariant) { + BYTE* op = (BYTE*)dst; + RETURN_ERROR_IF(dstCapacity < srcSize + ZSTD_SKIPPABLEHEADERSIZE /* Skippable frame overhead */, + dstSize_tooSmall, "Not enough room for skippable frame"); + RETURN_ERROR_IF(srcSize > (unsigned)0xFFFFFFFF, srcSize_wrong, "Src size too large for skippable frame"); + RETURN_ERROR_IF(magicVariant > 15, parameter_outOfBound, "Skippable frame magic number variant not supported"); + + MEM_writeLE32(op, (U32)(ZSTD_MAGIC_SKIPPABLE_START + magicVariant)); + MEM_writeLE32(op+4, (U32)srcSize); + ZSTD_memcpy(op+8, src, srcSize); + return srcSize + ZSTD_SKIPPABLEHEADERSIZE; +} + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapacity` is too small (stage != ZSTDcs_init, stage_wrong, + "wrong cctx stage"); + RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, + parameter_unsupported, + "incompatible with ldm"); + cctx->externSeqStore.seq = seq; + cctx->externSeqStore.size = nbSeq; + cctx->externSeqStore.capacity = nbSeq; + cctx->externSeqStore.pos = 0; + cctx->externSeqStore.posInSequence = 0; + return 0; +} + + +static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + U32 frame, U32 lastFrameChunk) +{ + ZSTD_matchState_t* const ms = &cctx->blockState.matchState; + size_t fhSize = 0; + + DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", + cctx->stage, (unsigned)srcSize); + RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, + "missing init (ZSTD_compressBegin)"); + + if (frame && (cctx->stage==ZSTDcs_init)) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, + cctx->pledgedSrcSizePlusOne-1, cctx->dictID); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); + assert(fhSize <= dstCapacity); + dstCapacity -= fhSize; + dst = (char*)dst + fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (!srcSize) return fhSize; /* do not generate an empty block if no input */ + + if (!ZSTD_window_update(&ms->window, src, srcSize)) { + ms->nextToUpdate = ms->window.dictLimit; + } + if (cctx->appliedParams.ldmParams.enableLdm) { + ZSTD_window_update(&cctx->ldmState.window, src, srcSize); + } + + if (!frame) { + /* overflow check and correction for block mode */ + ZSTD_overflowCorrectIfNeeded( + ms, &cctx->workspace, &cctx->appliedParams, + src, (BYTE const*)src + srcSize); + } + + DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); + { size_t const cSize = frame ? + ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : + ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize, 0 /* frame */); + FORWARD_IF_ERROR(cSize, "%s", frame ? "ZSTD_compress_frameChunk failed" : "ZSTD_compressBlock_internal failed"); + cctx->consumedSrcSize += srcSize; + cctx->producedCSize += (cSize + fhSize); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + RETURN_ERROR_IF( + cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize >= %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); + } + return cSize + fhSize; + } +} + +size_t ZSTD_compressContinue (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressContinue (srcSize=%u)", (unsigned)srcSize); + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1 /* frame mode */, 0 /* last chunk */); +} + + +size_t ZSTD_getBlockSize(const ZSTD_CCtx* cctx) +{ + ZSTD_compressionParameters const cParams = cctx->appliedParams.cParams; + assert(!ZSTD_checkCParams(cParams)); + return MIN (ZSTD_BLOCKSIZE_MAX, (U32)1 << cParams.windowLog); +} + +size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); + { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); + RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong, "input is larger than a block"); } + + return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); +} + +/*! ZSTD_loadDictionaryContent() : + * @return : 0, or an error code + */ +static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, + ldmState_t* ls, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* src, size_t srcSize, + ZSTD_dictTableLoadMethod_e dtlm) +{ + const BYTE* ip = (const BYTE*) src; + const BYTE* const iend = ip + srcSize; + + ZSTD_window_update(&ms->window, src, srcSize); + ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base); + + if (params->ldmParams.enableLdm && ls != NULL) { + ZSTD_window_update(&ls->window, src, srcSize); + ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base); + } + + /* Assert that we the ms params match the params we're being given */ + ZSTD_assertEqualCParams(params->cParams, ms->cParams); + + if (srcSize <= HASH_READ_SIZE) return 0; + + while (iend - ip > HASH_READ_SIZE) { + size_t const remaining = (size_t)(iend - ip); + size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); + const BYTE* const ichunk = ip + chunk; + + ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk); + + if (params->ldmParams.enableLdm && ls != NULL) + ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, ¶ms->ldmParams); + + switch(params->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, ichunk, dtlm); + break; + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) { + assert(chunk == remaining); /* must load everything in one go */ + ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE); + } else if (chunk >= HASH_READ_SIZE) { + ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); + } + break; + + case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + if (chunk >= HASH_READ_SIZE) + ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); + break; + + default: + assert(0); /* not possible : not a valid strategy id */ + } + + ip = ichunk; + } + + ms->nextToUpdate = (U32)(iend - ms->window.base); + return 0; +} + + +/* Dictionaries that assign zero probability to symbols that show up causes problems + * when FSE encoding. Mark dictionaries with zero probability symbols as FSE_repeat_check + * and only dictionaries with 100% valid symbols can be assumed valid. + */ +static FSE_repeat ZSTD_dictNCountRepeat(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) +{ + U32 s; + if (dictMaxSymbolValue < maxSymbolValue) { + return FSE_repeat_check; + } + for (s = 0; s <= maxSymbolValue; ++s) { + if (normalizedCounter[s] == 0) { + return FSE_repeat_check; + } + } + return FSE_repeat_valid; +} + +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + const void* const dict, size_t dictSize) +{ + short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff; + const BYTE* dictPtr = (const BYTE*)dict; /* skip magic num and dict ID */ + const BYTE* const dictEnd = dictPtr + dictSize; + dictPtr += 8; + bs->entropy.huf.repeatMode = HUF_repeat_check; + + { unsigned maxSymbolValue = 255; + unsigned hasZeroWeights = 1; + size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, + dictEnd-dictPtr, &hasZeroWeights); + + /* We only set the loaded table as valid if it contains all non-zero + * weights. Otherwise, we set it to check */ + if (!hasZeroWeights) + bs->entropy.huf.repeatMode = HUF_repeat_valid; + + RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, ""); + dictPtr += hufHeaderSize; + } + + { unsigned offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); + /* fill all offset symbols to avoid garbage at end of table */ + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.offcodeCTable, + offcodeNCount, MaxOff, offcodeLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.matchlengthCTable, + matchlengthNCount, matchlengthMaxValue, matchlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + bs->entropy.fse.matchlength_repeatMode = ZSTD_dictNCountRepeat(matchlengthNCount, matchlengthMaxValue, MaxML); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); + RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( + bs->entropy.fse.litlengthCTable, + litlengthNCount, litlengthMaxValue, litlengthLog, + workspace, HUF_WORKSPACE_SIZE)), + dictionary_corrupted, ""); + bs->entropy.fse.litlength_repeatMode = ZSTD_dictNCountRepeat(litlengthNCount, litlengthMaxValue, MaxLL); + dictPtr += litlengthHeaderSize; + } + + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); + bs->rep[0] = MEM_readLE32(dictPtr+0); + bs->rep[1] = MEM_readLE32(dictPtr+4); + bs->rep[2] = MEM_readLE32(dictPtr+8); + dictPtr += 12; + + { size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + U32 offcodeMax = MaxOff; + if (dictContentSize <= ((U32)-1) - 128 KB) { + U32 const maxOffset = (U32)dictContentSize + 128 KB; /* The maximum offset that must be supported */ + offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ + } + /* All offset values <= dictContentSize + 128 KB must be representable for a valid table */ + bs->entropy.fse.offcode_repeatMode = ZSTD_dictNCountRepeat(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)); + + /* All repCodes must be <= dictContentSize and != 0 */ + { U32 u; + for (u=0; u<3; u++) { + RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted, ""); + RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted, ""); + } } } + + return dictPtr - (const BYTE*)dict; +} + +/* Dictionary format : + * See : + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#dictionary-format + */ +/*! ZSTD_loadZstdDictionary() : + * @return : dictID, or an error code + * assumptions : magic number supposed already checked + * dictSize supposed >= 8 + */ +static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ZSTD_cwksp* ws, + ZSTD_CCtx_params const* params, + const void* dict, size_t dictSize, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + size_t dictID; + size_t eSize; + + ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<= 8); + assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY); + + dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr + 4 /* skip magic number */ ); + eSize = ZSTD_loadCEntropy(bs, workspace, dict, dictSize); + FORWARD_IF_ERROR(eSize, "ZSTD_loadCEntropy failed"); + dictPtr += eSize; + + { + size_t const dictContentSize = (size_t)(dictEnd - dictPtr); + FORWARD_IF_ERROR(ZSTD_loadDictionaryContent( + ms, NULL, ws, params, dictPtr, dictContentSize, dtlm), ""); + } + return dictID; +} + +/** ZSTD_compress_insertDictionary() : +* @return : dictID, or an error code */ +static size_t +ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, + ZSTD_matchState_t* ms, + ldmState_t* ls, + ZSTD_cwksp* ws, + const ZSTD_CCtx_params* params, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + void* workspace) +{ + DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize); + if ((dict==NULL) || (dictSize<8)) { + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); + return 0; + } + + ZSTD_reset_compressedBlockState(bs); + + /* dict restricted modes */ + if (dictContentType == ZSTD_dct_rawContent) + return ZSTD_loadDictionaryContent(ms, ls, ws, params, dict, dictSize, dtlm); + + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_auto) { + DEBUGLOG(4, "raw content dictionary detected"); + return ZSTD_loadDictionaryContent( + ms, ls, ws, params, dict, dictSize, dtlm); + } + RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong, ""); + assert(0); /* impossible */ + } + + /* dict as full zstd dictionary */ + return ZSTD_loadZstdDictionary( + bs, ms, ws, params, dict, dictSize, dtlm, workspace); +} + +#define ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF (128 KB) +#define ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER (6ULL) + +/*! ZSTD_compressBegin_internal() : + * @return : 0, or an error code */ +static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, U64 pledgedSrcSize, + ZSTD_buffered_policy_e zbuff) +{ + DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog); + /* params are supposed to be fully validated at this point */ + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + if ( (cdict) + && (cdict->dictContentSize > 0) + && ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || cdict->compressionLevel == 0) + && (params->attachDictPref != ZSTD_dictForceLoad) ) { + return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); + } + + FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize, + ZSTDcrp_makeClean, zbuff) , ""); + { size_t const dictID = cdict ? + ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, cdict->dictContent, + cdict->dictContentSize, cdict->dictContentType, dtlm, + cctx->entropyWorkspace) + : ZSTD_compress_insertDictionary( + cctx->blockState.prevCBlock, &cctx->blockState.matchState, + &cctx->ldmState, &cctx->workspace, &cctx->appliedParams, dict, dictSize, + dictContentType, dtlm, cctx->entropyWorkspace); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); + assert(dictID <= UINT_MAX); + cctx->dictID = (U32)dictID; + cctx->dictContentSize = cdict ? cdict->dictContentSize : dictSize; + } + return 0; +} + +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params->cParams.windowLog); + /* compression parameters verification and optimization */ + FORWARD_IF_ERROR( ZSTD_checkCParams(params->cParams) , ""); + return ZSTD_compressBegin_internal(cctx, + dict, dictSize, dictContentType, dtlm, + cdict, + params, pledgedSrcSize, + ZSTDb_not_buffered); +} + +/*! ZSTD_compressBegin_advanced() : +* @return : 0, or an error code */ +size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams; + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return ZSTD_compressBegin_advanced_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, + NULL /*cdict*/, + &cctxParams, pledgedSrcSize); +} + +size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_CCtx_params cctxParams; + { + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_noAttachDict); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel); + } + DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (unsigned)dictSize); + return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered); +} + +size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel) +{ + return ZSTD_compressBegin_usingDict(cctx, NULL, 0, compressionLevel); +} + + +/*! ZSTD_writeEpilogue() : +* Ends a frame. +* @return : nb of bytes written into dst (or an error code) */ +static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* op = ostart; + size_t fhSize = 0; + + DEBUGLOG(4, "ZSTD_writeEpilogue"); + RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); + + /* special case : empty frame */ + if (cctx->stage == ZSTDcs_init) { + fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0); + FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed"); + dstCapacity -= fhSize; + op += fhSize; + cctx->stage = ZSTDcs_ongoing; + } + + if (cctx->stage != ZSTDcs_ending) { + /* write one last empty block, make it the "last" block */ + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue"); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + } + + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) xxh64_digest(&cctx->xxhState); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); + DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); + MEM_writeLE32(op, checksum); + op += 4; + } + + cctx->stage = ZSTDcs_created; /* return to "created but no init" status */ + return op-ostart; +} + +void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize) +{ + (void)cctx; + (void)extraCSize; +} + +size_t ZSTD_compressEnd (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t endResult; + size_t const cSize = ZSTD_compressContinue_internal(cctx, + dst, dstCapacity, src, srcSize, + 1 /* frame mode */, 1 /* last chunk */); + FORWARD_IF_ERROR(cSize, "ZSTD_compressContinue_internal failed"); + endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); + FORWARD_IF_ERROR(endResult, "ZSTD_writeEpilogue failed"); + assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); + if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); + DEBUGLOG(4, "end of frame : controlling src size"); + RETURN_ERROR_IF( + cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, + srcSize_wrong, + "error : pledgedSrcSize = %u, while realSrcSize = %u", + (unsigned)cctx->pledgedSrcSizePlusOne-1, + (unsigned)cctx->consumedSrcSize); + } + ZSTD_CCtx_trace(cctx, endResult); + return cSize + endResult; +} + +size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + ZSTD_parameters params) +{ + ZSTD_CCtx_params cctxParams; + DEBUGLOG(4, "ZSTD_compress_advanced"); + FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), ""); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, ZSTD_NO_CLEVEL); + return ZSTD_compress_advanced_internal(cctx, + dst, dstCapacity, + src, srcSize, + dict, dictSize, + &cctxParams); +} + +/* Internal */ +size_t ZSTD_compress_advanced_internal( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + const ZSTD_CCtx_params* params) +{ + DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, + params, srcSize, ZSTDb_not_buffered) , ""); + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + int compressionLevel) +{ + ZSTD_CCtx_params cctxParams; + { + ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict); + assert(params.fParams.contentSizeFlag == 1); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel); + } + DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize); + return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams); +} + +size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (unsigned)srcSize); + assert(cctx != NULL); + return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel); +} + +size_t ZSTD_compress(void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + int compressionLevel) +{ + size_t result; + ZSTD_CCtx* cctx = ZSTD_createCCtx(); + RETURN_ERROR_IF(!cctx, memory_allocation, "ZSTD_createCCtx failed"); + result = ZSTD_compressCCtx(cctx, dst, dstCapacity, src, srcSize, compressionLevel); + ZSTD_freeCCtx(cctx); + return result; +} + + +/* ===== Dictionary API ===== */ + +/*! ZSTD_estimateCDictSize_advanced() : + * Estimate amount of memory that will be needed to create a dictionary with following arguments */ +size_t ZSTD_estimateCDictSize_advanced( + size_t dictSize, ZSTD_compressionParameters cParams, + ZSTD_dictLoadMethod_e dictLoadMethod) +{ + DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict)); + return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *)))); +} + +size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + return ZSTD_estimateCDictSize_advanced(dictSize, cParams, ZSTD_dlm_byCopy); +} + +size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support sizeof on NULL */ + DEBUGLOG(5, "sizeof(*cdict) : %u", (unsigned)sizeof(*cdict)); + /* cdict may be in the workspace */ + return (cdict->workspace.workspace == cdict ? 0 : sizeof(*cdict)) + + ZSTD_cwksp_sizeof(&cdict->workspace); +} + +static size_t ZSTD_initCDict_internal( + ZSTD_CDict* cdict, + const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_CCtx_params params) +{ + DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (unsigned)dictContentType); + assert(!ZSTD_checkCParams(params.cParams)); + cdict->matchState.cParams = params.cParams; + cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch; + if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) { + cdict->matchState.dedicatedDictSearch = 0; + } + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) { + cdict->dictContent = dictBuffer; + } else { + void *internalBuffer = ZSTD_cwksp_reserve_object(&cdict->workspace, ZSTD_cwksp_align(dictSize, sizeof(void*))); + RETURN_ERROR_IF(!internalBuffer, memory_allocation, "NULL pointer!"); + cdict->dictContent = internalBuffer; + ZSTD_memcpy(internalBuffer, dictBuffer, dictSize); + } + cdict->dictContentSize = dictSize; + cdict->dictContentType = dictContentType; + + cdict->entropyWorkspace = (U32*)ZSTD_cwksp_reserve_object(&cdict->workspace, HUF_WORKSPACE_SIZE); + + + /* Reset the state to no dictionary */ + ZSTD_reset_compressedBlockState(&cdict->cBlockState); + FORWARD_IF_ERROR(ZSTD_reset_matchState( + &cdict->matchState, + &cdict->workspace, + ¶ms.cParams, + ZSTDcrp_makeClean, + ZSTDirp_reset, + ZSTD_resetTarget_CDict), ""); + /* (Maybe) load the dictionary + * Skips loading the dictionary if it is < 8 bytes. + */ + { params.compressionLevel = ZSTD_CLEVEL_DEFAULT; + params.fParams.contentSizeFlag = 1; + { size_t const dictID = ZSTD_compress_insertDictionary( + &cdict->cBlockState, &cdict->matchState, NULL, &cdict->workspace, + ¶ms, cdict->dictContent, cdict->dictContentSize, + dictContentType, ZSTD_dtlm_full, cdict->entropyWorkspace); + FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed"); + assert(dictID <= (size_t)(U32)-1); + cdict->dictID = (U32)dictID; + } + } + + return 0; +} + +static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_compressionParameters cParams, ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { size_t const workspaceSize = + ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))); + void* const workspace = ZSTD_customMalloc(workspaceSize, customMem); + ZSTD_cwksp ws; + ZSTD_CDict* cdict; + + if (!workspace) { + ZSTD_customFree(workspace, customMem); + return NULL; + } + + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_dynamic_alloc); + + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + assert(cdict != NULL); + ZSTD_cwksp_move(&cdict->workspace, &ws); + cdict->customMem = customMem; + cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */ + + return cdict; + } +} + +ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams, + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params cctxParams; + ZSTD_memset(&cctxParams, 0, sizeof(cctxParams)); + ZSTD_CCtxParams_init(&cctxParams, 0); + cctxParams.cParams = cParams; + cctxParams.customMem = customMem; + return ZSTD_createCDict_advanced2( + dictBuffer, dictSize, + dictLoadMethod, dictContentType, + &cctxParams, customMem); +} + +ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2( + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + const ZSTD_CCtx_params* originalCctxParams, + ZSTD_customMem customMem) +{ + ZSTD_CCtx_params cctxParams = *originalCctxParams; + ZSTD_compressionParameters cParams; + ZSTD_CDict* cdict; + + DEBUGLOG(3, "ZSTD_createCDict_advanced2, mode %u", (unsigned)dictContentType); + if (!customMem.customAlloc ^ !customMem.customFree) return NULL; + + if (cctxParams.enableDedicatedDictSearch) { + cParams = ZSTD_dedicatedDictSearch_getCParams( + cctxParams.compressionLevel, dictSize); + ZSTD_overrideCParams(&cParams, &cctxParams.cParams); + } else { + cParams = ZSTD_getCParamsFromCCtxParams( + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + } + + if (!ZSTD_dedicatedDictSearch_isSupported(&cParams)) { + /* Fall back to non-DDSS params */ + cctxParams.enableDedicatedDictSearch = 0; + cParams = ZSTD_getCParamsFromCCtxParams( + &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + } + + cctxParams.cParams = cParams; + + cdict = ZSTD_createCDict_advanced_internal(dictSize, + dictLoadMethod, cctxParams.cParams, + customMem); + + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + dictLoadMethod, dictContentType, + cctxParams) )) { + ZSTD_freeCDict(cdict); + return NULL; + } + + return cdict; +} + +ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byCopy, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + if (cdict) + cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; +} + +ZSTD_CDict* ZSTD_createCDict_byReference(const void* dict, size_t dictSize, int compressionLevel) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict); + ZSTD_CDict* const cdict = ZSTD_createCDict_advanced(dict, dictSize, + ZSTD_dlm_byRef, ZSTD_dct_auto, + cParams, ZSTD_defaultCMem); + if (cdict) + cdict->compressionLevel = (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT : compressionLevel; + return cdict; +} + +size_t ZSTD_freeCDict(ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = cdict->customMem; + int cdictInWorkspace = ZSTD_cwksp_owns_buffer(&cdict->workspace, cdict); + ZSTD_cwksp_free(&cdict->workspace, cMem); + if (!cdictInWorkspace) { + ZSTD_customFree(cdict, cMem); + } + return 0; + } +} + +/*! ZSTD_initStaticCDict_advanced() : + * Generate a digested dictionary in provided memory area. + * workspace: The memory area to emplace the dictionary into. + * Provided pointer must 8-bytes aligned. + * It must outlive dictionary usage. + * workspaceSize: Use ZSTD_estimateCDictSize() + * to determine how large workspace must be. + * cParams : use ZSTD_getCParams() to transform a compression level + * into its relevants cParams. + * @return : pointer to ZSTD_CDict*, or NULL if error (size too small) + * Note : there is no corresponding "free" function. + * Since workspace was allocated externally, it must be freed externally. + */ +const ZSTD_CDict* ZSTD_initStaticCDict( + void* workspace, size_t workspaceSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_compressionParameters cParams) +{ + size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0); + size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 + : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*)))) + + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) + + matchStateSize; + ZSTD_CDict* cdict; + ZSTD_CCtx_params params; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + + { + ZSTD_cwksp ws; + ZSTD_cwksp_init(&ws, workspace, workspaceSize, ZSTD_cwksp_static_alloc); + cdict = (ZSTD_CDict*)ZSTD_cwksp_reserve_object(&ws, sizeof(ZSTD_CDict)); + if (cdict == NULL) return NULL; + ZSTD_cwksp_move(&cdict->workspace, &ws); + } + + DEBUGLOG(4, "(workspaceSize < neededSize) : (%u < %u) => %u", + (unsigned)workspaceSize, (unsigned)neededSize, (unsigned)(workspaceSize < neededSize)); + if (workspaceSize < neededSize) return NULL; + + ZSTD_CCtxParams_init(¶ms, 0); + params.cParams = cParams; + + if (ZSTD_isError( ZSTD_initCDict_internal(cdict, + dict, dictSize, + dictLoadMethod, dictContentType, + params) )) + return NULL; + + return cdict; +} + +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict) +{ + assert(cdict != NULL); + return cdict->matchState.cParams; +} + +/*! ZSTD_getDictID_fromCDict() : + * Provides the dictID of the dictionary loaded into `cdict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict) +{ + if (cdict==NULL) return 0; + return cdict->dictID; +} + + +/* ZSTD_compressBegin_usingCDict_advanced() : + * cdict must be != NULL */ +size_t ZSTD_compressBegin_usingCDict_advanced( + ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, + ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) +{ + ZSTD_CCtx_params cctxParams; + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); + RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!"); + /* Initialize the cctxParams from the cdict */ + { + ZSTD_parameters params; + params.fParams = fParams; + params.cParams = ( pledgedSrcSize < ZSTD_USE_CDICT_PARAMS_SRCSIZE_CUTOFF + || pledgedSrcSize < cdict->dictContentSize * ZSTD_USE_CDICT_PARAMS_DICTSIZE_MULTIPLIER + || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN + || cdict->compressionLevel == 0 ) ? + ZSTD_getCParamsFromCDict(cdict) + : ZSTD_getCParams(cdict->compressionLevel, + pledgedSrcSize, + cdict->dictContentSize); + ZSTD_CCtxParams_init_internal(&cctxParams, ¶ms, cdict->compressionLevel); + } + /* Increase window log to fit the entire dictionary and source if the + * source size is known. Limit the increase to 19, which is the + * window log for compression level 1 with the largest source size. + */ + if (pledgedSrcSize != ZSTD_CONTENTSIZE_UNKNOWN) { + U32 const limitedSrcSize = (U32)MIN(pledgedSrcSize, 1U << 19); + U32 const limitedSrcLog = limitedSrcSize > 1 ? ZSTD_highbit32(limitedSrcSize - 1) + 1 : 1; + cctxParams.cParams.windowLog = MAX(cctxParams.cParams.windowLog, limitedSrcLog); + } + return ZSTD_compressBegin_internal(cctx, + NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, + cdict, + &cctxParams, pledgedSrcSize, + ZSTDb_not_buffered); +} + +/* ZSTD_compressBegin_usingCDict() : + * pledgedSrcSize=0 means "unknown" + * if pledgedSrcSize>0, it will enable contentSizeFlag */ +size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag); + return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); +} + +size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) +{ + FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */ + return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); +} + +/*! ZSTD_compress_usingCDict() : + * Compression using a digested Dictionary. + * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. + * Note that compression parameters are decided at CDict creation time + * while frame parameters are hardcoded */ +size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_CDict* cdict) +{ + ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ }; + return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams); +} + + + +/* ****************************************************************** +* Streaming +********************************************************************/ + +ZSTD_CStream* ZSTD_createCStream(void) +{ + DEBUGLOG(3, "ZSTD_createCStream"); + return ZSTD_createCStream_advanced(ZSTD_defaultCMem); +} + +ZSTD_CStream* ZSTD_initStaticCStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticCCtx(workspace, workspaceSize); +} + +ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem) +{ /* CStream and CCtx are now same object */ + return ZSTD_createCCtx_advanced(customMem); +} + +size_t ZSTD_freeCStream(ZSTD_CStream* zcs) +{ + return ZSTD_freeCCtx(zcs); /* same object */ +} + + + +/*====== Initialization ======*/ + +size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_CStreamOutSize(void) +{ + return ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; +} + +static ZSTD_cParamMode_e ZSTD_getCParamMode(ZSTD_CDict const* cdict, ZSTD_CCtx_params const* params, U64 pledgedSrcSize) +{ + if (cdict != NULL && ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) + return ZSTD_cpm_attachDict; + else + return ZSTD_cpm_noAttachDict; +} + +/* ZSTD_resetCStream(): + * pledgedSrcSize == 0 means "unknown" */ +size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) +{ + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + return 0; +} + +/*! ZSTD_initCStream_internal() : + * Note : for lib/compress only. Used by zstdmt_compress.c. + * Assumption 1 : params are valid + * Assumption 2 : either dict, or cdict, is defined, not both */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_internal"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams))); + zcs->requestedParams = *params; + assert(!((dict) && (cdict))); /* either dict or cdict, not both */ + if (dict) { + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + } else { + /* Dictionary is cleared if !cdict */ + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + } + return 0; +} + +/* ZSTD_initCStream_usingCDict_advanced() : + * same as ZSTD_initCStream_usingCDict(), with control over frame parameters */ +size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, + const ZSTD_CDict* cdict, + ZSTD_frameParameters fParams, + unsigned long long pledgedSrcSize) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + zcs->requestedParams.fParams = fParams; + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + return 0; +} + +/* note : cdict must outlive compression session */ +size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) , ""); + return 0; +} + + +/* ZSTD_initCStream_advanced() : + * pledgedSrcSize must be exact. + * if srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. + * dict is loaded with default parameters ZSTD_dct_auto and ZSTD_dlm_byCopy. */ +size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + ZSTD_parameters params, unsigned long long pss) +{ + /* for compatibility with older programs relying on this behavior. + * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. + * This line will be removed in the future. + */ + U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_advanced"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) , ""); + ZSTD_CCtxParams_setZstdParams(&zcs->requestedParams, ¶ms); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + return 0; +} + +size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_initCStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) , ""); + return 0; +} + +size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) +{ + /* temporary : 0 interpreted as "unknown" during transition period. + * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. + * 0 will be interpreted as "empty" in the future. + */ + U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; + DEBUGLOG(4, "ZSTD_initCStream_srcSize"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) , ""); + return 0; +} + +size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) +{ + DEBUGLOG(4, "ZSTD_initCStream"); + FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) , ""); + FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) , ""); + return 0; +} + +/*====== Compression ======*/ + +static size_t ZSTD_nextInputSizeHint(const ZSTD_CCtx* cctx) +{ + size_t hintInSize = cctx->inBuffTarget - cctx->inBuffPos; + if (hintInSize==0) hintInSize = cctx->blockSize; + return hintInSize; +} + +/** ZSTD_compressStream_generic(): + * internal function for all *compressStream*() variants + * non-static, because can be called from zstdmt_compress.c + * @return : hint size for next input */ +static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective const flushMode) +{ + const char* const istart = (const char*)input->src; + const char* const iend = input->size != 0 ? istart + input->size : istart; + const char* ip = input->pos != 0 ? istart + input->pos : istart; + char* const ostart = (char*)output->dst; + char* const oend = output->size != 0 ? ostart + output->size : ostart; + char* op = output->pos != 0 ? ostart + output->pos : ostart; + U32 someMoreWork = 1; + + /* check expectations */ + DEBUGLOG(5, "ZSTD_compressStream_generic, flush=%u", (unsigned)flushMode); + if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { + assert(zcs->inBuff != NULL); + assert(zcs->inBuffSize > 0); + } + if (zcs->appliedParams.outBufferMode == ZSTD_bm_buffered) { + assert(zcs->outBuff != NULL); + assert(zcs->outBuffSize > 0); + } + assert(output->pos <= output->size); + assert(input->pos <= input->size); + assert((U32)flushMode <= (U32)ZSTD_e_end); + + while (someMoreWork) { + switch(zcs->streamStage) + { + case zcss_init: + RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); + + case zcss_load: + if ( (flushMode == ZSTD_e_end) + && ( (size_t)(oend-op) >= ZSTD_compressBound(iend-ip) /* Enough output space */ + || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) /* OR we are allowed to return dstSizeTooSmall */ + && (zcs->inBuffPos == 0) ) { + /* shortcut to compression pass directly into output buffer */ + size_t const cSize = ZSTD_compressEnd(zcs, + op, oend-op, ip, iend-ip); + DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); + FORWARD_IF_ERROR(cSize, "ZSTD_compressEnd failed"); + ip = iend; + op += cSize; + zcs->frameEnded = 1; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + someMoreWork = 0; break; + } + /* complete loading into inBuffer in buffered mode */ + if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) { + size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos; + size_t const loaded = ZSTD_limitCopy( + zcs->inBuff + zcs->inBuffPos, toLoad, + ip, iend-ip); + zcs->inBuffPos += loaded; + if (loaded != 0) + ip += loaded; + if ( (flushMode == ZSTD_e_continue) + && (zcs->inBuffPos < zcs->inBuffTarget) ) { + /* not enough input to fill full block : stop here */ + someMoreWork = 0; break; + } + if ( (flushMode == ZSTD_e_flush) + && (zcs->inBuffPos == zcs->inToCompress) ) { + /* empty */ + someMoreWork = 0; break; + } + } + /* compress current block (note : this stage cannot be stopped in the middle) */ + DEBUGLOG(5, "stream compression stage (flushMode==%u)", flushMode); + { int const inputBuffered = (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered); + void* cDst; + size_t cSize; + size_t oSize = oend-op; + size_t const iSize = inputBuffered + ? zcs->inBuffPos - zcs->inToCompress + : MIN((size_t)(iend - ip), zcs->blockSize); + if (oSize >= ZSTD_compressBound(iSize) || zcs->appliedParams.outBufferMode == ZSTD_bm_stable) + cDst = op; /* compress into output buffer, to skip flush stage */ + else + cDst = zcs->outBuff, oSize = zcs->outBuffSize; + if (inputBuffered) { + unsigned const lastBlock = (flushMode == ZSTD_e_end) && (ip==iend); + cSize = lastBlock ? + ZSTD_compressEnd(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize) : + ZSTD_compressContinue(zcs, cDst, oSize, + zcs->inBuff + zcs->inToCompress, iSize); + FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); + zcs->frameEnded = lastBlock; + /* prepare next block */ + zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; + if (zcs->inBuffTarget > zcs->inBuffSize) + zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; + DEBUGLOG(5, "inBuffTarget:%u / inBuffSize:%u", + (unsigned)zcs->inBuffTarget, (unsigned)zcs->inBuffSize); + if (!lastBlock) + assert(zcs->inBuffTarget <= zcs->inBuffSize); + zcs->inToCompress = zcs->inBuffPos; + } else { + unsigned const lastBlock = (ip + iSize == iend); + assert(flushMode == ZSTD_e_end /* Already validated */); + cSize = lastBlock ? + ZSTD_compressEnd(zcs, cDst, oSize, ip, iSize) : + ZSTD_compressContinue(zcs, cDst, oSize, ip, iSize); + /* Consume the input prior to error checking to mirror buffered mode. */ + if (iSize > 0) + ip += iSize; + FORWARD_IF_ERROR(cSize, "%s", lastBlock ? "ZSTD_compressEnd failed" : "ZSTD_compressContinue failed"); + zcs->frameEnded = lastBlock; + if (lastBlock) + assert(ip == iend); + } + if (cDst == op) { /* no need to flush */ + op += cSize; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed directly in outBuffer"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + } + break; + } + zcs->outBuffContentSize = cSize; + zcs->outBuffFlushedSize = 0; + zcs->streamStage = zcss_flush; /* pass-through to flush stage */ + } + /* fall-through */ + case zcss_flush: + DEBUGLOG(5, "flush stage"); + assert(zcs->appliedParams.outBufferMode == ZSTD_bm_buffered); + { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; + size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), + zcs->outBuff + zcs->outBuffFlushedSize, toFlush); + DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", + (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); + if (flushed) + op += flushed; + zcs->outBuffFlushedSize += flushed; + if (toFlush!=flushed) { + /* flush not fully completed, presumably because dst is too small */ + assert(op==oend); + someMoreWork = 0; + break; + } + zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; + if (zcs->frameEnded) { + DEBUGLOG(5, "Frame completed on flush"); + someMoreWork = 0; + ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); + break; + } + zcs->streamStage = zcss_load; + break; + } + + default: /* impossible */ + assert(0); + } + } + + input->pos = ip - istart; + output->pos = op - ostart; + if (zcs->frameEnded) return 0; + return ZSTD_nextInputSizeHint(zcs); +} + +static size_t ZSTD_nextInputSizeHint_MTorST(const ZSTD_CCtx* cctx) +{ + return ZSTD_nextInputSizeHint(cctx); + +} + +size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) , ""); + return ZSTD_nextInputSizeHint_MTorST(zcs); +} + +/* After a compression call set the expected input/output buffer. + * This is validated at the start of the next compression call. + */ +static void ZSTD_setBufferExpectations(ZSTD_CCtx* cctx, ZSTD_outBuffer const* output, ZSTD_inBuffer const* input) +{ + if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { + cctx->expectedInBuffer = *input; + } + if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { + cctx->expectedOutBufferSize = output->size - output->pos; + } +} + +/* Validate that the input/output buffers match the expectations set by + * ZSTD_setBufferExpectations. + */ +static size_t ZSTD_checkBufferStability(ZSTD_CCtx const* cctx, + ZSTD_outBuffer const* output, + ZSTD_inBuffer const* input, + ZSTD_EndDirective endOp) +{ + if (cctx->appliedParams.inBufferMode == ZSTD_bm_stable) { + ZSTD_inBuffer const expect = cctx->expectedInBuffer; + if (expect.src != input->src || expect.pos != input->pos || expect.size != input->size) + RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer enabled but input differs!"); + if (endOp != ZSTD_e_end) + RETURN_ERROR(srcBuffer_wrong, "ZSTD_c_stableInBuffer can only be used with ZSTD_e_end!"); + } + if (cctx->appliedParams.outBufferMode == ZSTD_bm_stable) { + size_t const outBufferSize = output->size - output->pos; + if (cctx->expectedOutBufferSize != outBufferSize) + RETURN_ERROR(dstBuffer_wrong, "ZSTD_c_stableOutBuffer enabled but output size differs!"); + } + return 0; +} + +static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx, + ZSTD_EndDirective endOp, + size_t inSize) { + ZSTD_CCtx_params params = cctx->requestedParams; + ZSTD_prefixDict const prefixDict = cctx->prefixDict; + FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */ + ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ + assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ + if (cctx->cdict) + params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */ + DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); + if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1; /* auto-fix pledgedSrcSize */ + { + size_t const dictSize = prefixDict.dict + ? prefixDict.dictSize + : (cctx->cdict ? cctx->cdict->dictContentSize : 0); + ZSTD_cParamMode_e const mode = ZSTD_getCParamMode(cctx->cdict, ¶ms, cctx->pledgedSrcSizePlusOne - 1); + params.cParams = ZSTD_getCParamsFromCCtxParams( + ¶ms, cctx->pledgedSrcSizePlusOne-1, + dictSize, mode); + } + + if (ZSTD_CParams_shouldEnableLdm(¶ms.cParams)) { + /* Enable LDM by default for optimal parser and window size >= 128MB */ + DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)"); + params.ldmParams.enableLdm = 1; + } + + { U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1; + assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); + FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, + prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, ZSTD_dtlm_fast, + cctx->cdict, + ¶ms, pledgedSrcSize, + ZSTDb_buffered) , ""); + assert(cctx->appliedParams.nbWorkers == 0); + cctx->inToCompress = 0; + cctx->inBuffPos = 0; + if (cctx->appliedParams.inBufferMode == ZSTD_bm_buffered) { + /* for small input: avoid automatic flush on reaching end of block, since + * it would require to add a 3-bytes null block to end frame + */ + cctx->inBuffTarget = cctx->blockSize + (cctx->blockSize == pledgedSrcSize); + } else { + cctx->inBuffTarget = 0; + } + cctx->outBuffContentSize = cctx->outBuffFlushedSize = 0; + cctx->streamStage = zcss_load; + cctx->frameEnded = 0; + } + return 0; +} + +size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, + ZSTD_outBuffer* output, + ZSTD_inBuffer* input, + ZSTD_EndDirective endOp) +{ + DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); + /* check conditions */ + RETURN_ERROR_IF(output->pos > output->size, dstSize_tooSmall, "invalid output buffer"); + RETURN_ERROR_IF(input->pos > input->size, srcSize_wrong, "invalid input buffer"); + RETURN_ERROR_IF((U32)endOp > (U32)ZSTD_e_end, parameter_outOfBound, "invalid endDirective"); + assert(cctx != NULL); + + /* transparent initialization stage */ + if (cctx->streamStage == zcss_init) { + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, endOp, input->size), "CompressStream2 initialization failed"); + ZSTD_setBufferExpectations(cctx, output, input); /* Set initial buffer expectations now that we've initialized */ + } + /* end of transparent initialization stage */ + + FORWARD_IF_ERROR(ZSTD_checkBufferStability(cctx, output, input, endOp), "invalid buffers"); + /* compression stage */ + FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) , ""); + DEBUGLOG(5, "completed ZSTD_compressStream2"); + ZSTD_setBufferExpectations(cctx, output, input); + return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ +} + +size_t ZSTD_compressStream2_simpleArgs ( + ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos, + ZSTD_EndDirective endOp) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compressStream2() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_compressStream2(cctx, &output, &input, endOp); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} + +size_t ZSTD_compress2(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + ZSTD_bufferMode_e const originalInBufferMode = cctx->requestedParams.inBufferMode; + ZSTD_bufferMode_e const originalOutBufferMode = cctx->requestedParams.outBufferMode; + DEBUGLOG(4, "ZSTD_compress2 (srcSize=%u)", (unsigned)srcSize); + ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); + /* Enable stable input/output buffers. */ + cctx->requestedParams.inBufferMode = ZSTD_bm_stable; + cctx->requestedParams.outBufferMode = ZSTD_bm_stable; + { size_t oPos = 0; + size_t iPos = 0; + size_t const result = ZSTD_compressStream2_simpleArgs(cctx, + dst, dstCapacity, &oPos, + src, srcSize, &iPos, + ZSTD_e_end); + /* Reset to the original values. */ + cctx->requestedParams.inBufferMode = originalInBufferMode; + cctx->requestedParams.outBufferMode = originalOutBufferMode; + FORWARD_IF_ERROR(result, "ZSTD_compressStream2_simpleArgs failed"); + if (result != 0) { /* compression not completed, due to lack of output space */ + assert(oPos == dstCapacity); + RETURN_ERROR(dstSize_tooSmall, ""); + } + assert(iPos == srcSize); /* all input is expected consumed */ + return oPos; + } +} + +typedef struct { + U32 idx; /* Index in array of ZSTD_Sequence */ + U32 posInSequence; /* Position within sequence at idx */ + size_t posInSrc; /* Number of bytes given by sequences provided so far */ +} ZSTD_sequencePosition; + +/* Returns a ZSTD error code if sequence is not valid */ +static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength, + size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) { + size_t offsetBound; + U32 windowSize = 1 << windowLog; + /* posInSrc represents the amount of data the the decoder would decode up to this point. + * As long as the amount of data decoded is less than or equal to window size, offsets may be + * larger than the total length of output decoded in order to reference the dict, even larger than + * window size. After output surpasses windowSize, we're limited to windowSize offsets again. + */ + offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize; + RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!"); + RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small"); + return 0; +} + +/* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */ +static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) { + U32 offCode = rawOffset + ZSTD_REP_MOVE; + U32 repCode = 0; + + if (!ll0 && rawOffset == rep[0]) { + repCode = 1; + } else if (rawOffset == rep[1]) { + repCode = 2 - ll0; + } else if (rawOffset == rep[2]) { + repCode = 3 - ll0; + } else if (ll0 && rawOffset == rep[0] - 1) { + repCode = 3; + } + if (repCode) { + /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */ + offCode = repCode - 1; + } + return offCode; +} + +/* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of + * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter. + */ +static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize) { + U32 idx = seqPos->idx; + BYTE const* ip = (BYTE const*)(src); + const BYTE* const iend = ip + blockSize; + repcodes_t updatedRepcodes; + U32 dictSize; + U32 litLength; + U32 matchLength; + U32 ll0; + U32 offCode; + + if (cctx->cdict) { + dictSize = (U32)cctx->cdict->dictContentSize; + } else if (cctx->prefixDict.dict) { + dictSize = (U32)cctx->prefixDict.dictSize; + } else { + dictSize = 0; + } + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); + for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) { + litLength = inSeqs[idx].litLength; + matchLength = inSeqs[idx].matchLength; + ll0 = litLength == 0; + offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0); + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); + + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); + if (cctx->appliedParams.validateSequences) { + seqPos->posInSrc += litLength + matchLength; + FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, + cctx->appliedParams.cParams.windowLog, dictSize, + cctx->appliedParams.cParams.minMatch), + "Sequence validation failed"); + } + RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, + "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ip += matchLength + litLength; + } + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); + + if (inSeqs[idx].litLength) { + DEBUGLOG(6, "Storing last literals of size: %u", inSeqs[idx].litLength); + ZSTD_storeLastLiterals(&cctx->seqStore, ip, inSeqs[idx].litLength); + ip += inSeqs[idx].litLength; + seqPos->posInSrc += inSeqs[idx].litLength; + } + RETURN_ERROR_IF(ip != iend, corruption_detected, "Blocksize doesn't agree with block delimiter!"); + seqPos->idx = idx+1; + return 0; +} + +/* Returns the number of bytes to move the current read position back by. Only non-zero + * if we ended up splitting a sequence. Otherwise, it may return a ZSTD error if something + * went wrong. + * + * This function will attempt to scan through blockSize bytes represented by the sequences + * in inSeqs, storing any (partial) sequences. + * + * Occasionally, we may want to change the actual number of bytes we consumed from inSeqs to + * avoid splitting a match, or to avoid splitting a match such that it would produce a match + * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block. + */ +static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize) { + U32 idx = seqPos->idx; + U32 startPosInSequence = seqPos->posInSequence; + U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize; + size_t dictSize; + BYTE const* ip = (BYTE const*)(src); + BYTE const* iend = ip + blockSize; /* May be adjusted if we decide to process fewer than blockSize bytes */ + repcodes_t updatedRepcodes; + U32 bytesAdjustment = 0; + U32 finalMatchSplit = 0; + U32 litLength; + U32 matchLength; + U32 rawOffset; + U32 offCode; + + if (cctx->cdict) { + dictSize = cctx->cdict->dictContentSize; + } else if (cctx->prefixDict.dict) { + dictSize = cctx->prefixDict.dictSize; + } else { + dictSize = 0; + } + DEBUGLOG(5, "ZSTD_copySequencesToSeqStore: idx: %u PIS: %u blockSize: %zu", idx, startPosInSequence, blockSize); + DEBUGLOG(5, "Start seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); + ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t)); + while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) { + const ZSTD_Sequence currSeq = inSeqs[idx]; + litLength = currSeq.litLength; + matchLength = currSeq.matchLength; + rawOffset = currSeq.offset; + + /* Modify the sequence depending on where endPosInSequence lies */ + if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) { + if (startPosInSequence >= litLength) { + startPosInSequence -= litLength; + litLength = 0; + matchLength -= startPosInSequence; + } else { + litLength -= startPosInSequence; + } + /* Move to the next sequence */ + endPosInSequence -= currSeq.litLength + currSeq.matchLength; + startPosInSequence = 0; + idx++; + } else { + /* This is the final (partial) sequence we're adding from inSeqs, and endPosInSequence + does not reach the end of the match. So, we have to split the sequence */ + DEBUGLOG(6, "Require a split: diff: %u, idx: %u PIS: %u", + currSeq.litLength + currSeq.matchLength - endPosInSequence, idx, endPosInSequence); + if (endPosInSequence > litLength) { + U32 firstHalfMatchLength; + litLength = startPosInSequence >= litLength ? 0 : litLength - startPosInSequence; + firstHalfMatchLength = endPosInSequence - startPosInSequence - litLength; + if (matchLength > blockSize && firstHalfMatchLength >= cctx->appliedParams.cParams.minMatch) { + /* Only ever split the match if it is larger than the block size */ + U32 secondHalfMatchLength = currSeq.matchLength + currSeq.litLength - endPosInSequence; + if (secondHalfMatchLength < cctx->appliedParams.cParams.minMatch) { + /* Move the endPosInSequence backward so that it creates match of minMatch length */ + endPosInSequence -= cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; + bytesAdjustment = cctx->appliedParams.cParams.minMatch - secondHalfMatchLength; + firstHalfMatchLength -= bytesAdjustment; + } + matchLength = firstHalfMatchLength; + /* Flag that we split the last match - after storing the sequence, exit the loop, + but keep the value of endPosInSequence */ + finalMatchSplit = 1; + } else { + /* Move the position in sequence backwards so that we don't split match, and break to store + * the last literals. We use the original currSeq.litLength as a marker for where endPosInSequence + * should go. We prefer to do this whenever it is not necessary to split the match, or if doing so + * would cause the first half of the match to be too small + */ + bytesAdjustment = endPosInSequence - currSeq.litLength; + endPosInSequence = currSeq.litLength; + break; + } + } else { + /* This sequence ends inside the literals, break to store the last literals */ + break; + } + } + /* Check if this offset can be represented with a repcode */ + { U32 ll0 = (litLength == 0); + offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0); + updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0); + } + + if (cctx->appliedParams.validateSequences) { + seqPos->posInSrc += litLength + matchLength; + FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc, + cctx->appliedParams.cParams.windowLog, dictSize, + cctx->appliedParams.cParams.minMatch), + "Sequence validation failed"); + } + DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength); + RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation, + "Not enough memory allocated. Try adjusting ZSTD_c_minMatch."); + ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH); + ip += matchLength + litLength; + } + DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength); + assert(idx == inSeqsSize || endPosInSequence <= inSeqs[idx].litLength + inSeqs[idx].matchLength); + seqPos->idx = idx; + seqPos->posInSequence = endPosInSequence; + ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t)); + + iend -= bytesAdjustment; + if (ip != iend) { + /* Store any last literals */ + U32 lastLLSize = (U32)(iend - ip); + assert(ip <= iend); + DEBUGLOG(6, "Storing last literals of size: %u", lastLLSize); + ZSTD_storeLastLiterals(&cctx->seqStore, ip, lastLLSize); + seqPos->posInSrc += lastLLSize; + } + + return bytesAdjustment; +} + +typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos, + const ZSTD_Sequence* const inSeqs, size_t inSeqsSize, + const void* src, size_t blockSize); +static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) { + ZSTD_sequenceCopier sequenceCopier = NULL; + assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode)); + if (mode == ZSTD_sf_explicitBlockDelimiters) { + return ZSTD_copySequencesToSeqStoreExplicitBlockDelim; + } else if (mode == ZSTD_sf_noBlockDelimiters) { + return ZSTD_copySequencesToSeqStoreNoBlockDelim; + } + assert(sequenceCopier != NULL); + return sequenceCopier; +} + +/* Compress, block-by-block, all of the sequences given. + * + * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error. + */ +static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) { + size_t cSize = 0; + U32 lastBlock; + size_t blockSize; + size_t compressedSeqsSize; + size_t remaining = srcSize; + ZSTD_sequencePosition seqPos = {0, 0, 0}; + + BYTE const* ip = (BYTE const*)src; + BYTE* op = (BYTE*)dst; + ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters); + + DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize); + /* Special case: empty frame */ + if (remaining == 0) { + U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "No room for empty frame block header"); + MEM_writeLE32(op, cBlockHeader24); + op += ZSTD_blockHeaderSize; + dstCapacity -= ZSTD_blockHeaderSize; + cSize += ZSTD_blockHeaderSize; + } + + while (remaining) { + size_t cBlockSize; + size_t additionalByteAdjustment; + lastBlock = remaining <= cctx->blockSize; + blockSize = lastBlock ? (U32)remaining : (U32)cctx->blockSize; + ZSTD_resetSeqStore(&cctx->seqStore); + DEBUGLOG(4, "Working on new block. Blocksize: %zu", blockSize); + + additionalByteAdjustment = sequenceCopier(cctx, &seqPos, inSeqs, inSeqsSize, ip, blockSize); + FORWARD_IF_ERROR(additionalByteAdjustment, "Bad sequence copy"); + blockSize -= additionalByteAdjustment; + + /* If blocks are too small, emit as a nocompress block */ + if (blockSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { + cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); + DEBUGLOG(4, "Block too small, writing out nocompress block: cSize: %zu", cBlockSize); + cSize += cBlockSize; + ip += blockSize; + op += cBlockSize; + remaining -= blockSize; + dstCapacity -= cBlockSize; + continue; + } + + compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore, + &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy, + &cctx->appliedParams, + op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize, + blockSize, + cctx->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */, + cctx->bmi2); + FORWARD_IF_ERROR(compressedSeqsSize, "Compressing sequences of block failed"); + DEBUGLOG(4, "Compressed sequences size: %zu", compressedSeqsSize); + + if (!cctx->isFirstBlock && + ZSTD_maybeRLE(&cctx->seqStore) && + ZSTD_isRLE((BYTE const*)src, srcSize)) { + /* We don't want to emit our first block as a RLE even if it qualifies because + * doing so will cause the decoder (cli only) to throw a "should consume all input error." + * This is only an issue for zstd <= v1.4.3 + */ + compressedSeqsSize = 1; + } + + if (compressedSeqsSize == 0) { + /* ZSTD_noCompressBlock writes the block header as well */ + cBlockSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "Nocompress block failed"); + DEBUGLOG(4, "Writing out nocompress block, size: %zu", cBlockSize); + } else if (compressedSeqsSize == 1) { + cBlockSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, blockSize, lastBlock); + FORWARD_IF_ERROR(cBlockSize, "RLE compress block failed"); + DEBUGLOG(4, "Writing out RLE block, size: %zu", cBlockSize); + } else { + U32 cBlockHeader; + /* Error checking and repcodes update */ + ZSTD_confirmRepcodesAndEntropyTables(cctx); + if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid) + cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check; + + /* Write block header into beginning of block*/ + cBlockHeader = lastBlock + (((U32)bt_compressed)<<1) + (U32)(compressedSeqsSize << 3); + MEM_writeLE24(op, cBlockHeader); + cBlockSize = ZSTD_blockHeaderSize + compressedSeqsSize; + DEBUGLOG(4, "Writing out compressed block, size: %zu", cBlockSize); + } + + cSize += cBlockSize; + DEBUGLOG(4, "cSize running total: %zu", cSize); + + if (lastBlock) { + break; + } else { + ip += blockSize; + op += cBlockSize; + remaining -= blockSize; + dstCapacity -= cBlockSize; + cctx->isFirstBlock = 0; + } + } + + return cSize; +} + +size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity, + const ZSTD_Sequence* inSeqs, size_t inSeqsSize, + const void* src, size_t srcSize) { + BYTE* op = (BYTE*)dst; + size_t cSize = 0; + size_t compressedBlocksSize = 0; + size_t frameHeaderSize = 0; + + /* Transparent initialization stage, same as compressStream2() */ + DEBUGLOG(3, "ZSTD_compressSequences()"); + assert(cctx != NULL); + FORWARD_IF_ERROR(ZSTD_CCtx_init_compressStream2(cctx, ZSTD_e_end, srcSize), "CCtx initialization failed"); + /* Begin writing output, starting with frame header */ + frameHeaderSize = ZSTD_writeFrameHeader(op, dstCapacity, &cctx->appliedParams, srcSize, cctx->dictID); + op += frameHeaderSize; + dstCapacity -= frameHeaderSize; + cSize += frameHeaderSize; + if (cctx->appliedParams.fParams.checksumFlag && srcSize) { + xxh64_update(&cctx->xxhState, src, srcSize); + } + /* cSize includes block header size and compressed sequences size */ + compressedBlocksSize = ZSTD_compressSequences_internal(cctx, + op, dstCapacity, + inSeqs, inSeqsSize, + src, srcSize); + FORWARD_IF_ERROR(compressedBlocksSize, "Compressing blocks failed!"); + cSize += compressedBlocksSize; + dstCapacity -= compressedBlocksSize; + + if (cctx->appliedParams.fParams.checksumFlag) { + U32 const checksum = (U32) xxh64_digest(&cctx->xxhState); + RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for checksum"); + DEBUGLOG(4, "Write checksum : %08X", (unsigned)checksum); + MEM_writeLE32((char*)dst + cSize, checksum); + cSize += 4; + } + + DEBUGLOG(3, "Final compressed size: %zu", cSize); + return cSize; +} + +/*====== Finalize ======*/ + +/*! ZSTD_flushStream() : + * @return : amount of data remaining to flush */ +size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = { NULL, 0, 0 }; + return ZSTD_compressStream2(zcs, output, &input, ZSTD_e_flush); +} + + +size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output) +{ + ZSTD_inBuffer input = { NULL, 0, 0 }; + size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); + FORWARD_IF_ERROR( remainingToFlush , "ZSTD_compressStream2 failed"); + if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ + /* single thread mode : attempt to calculate remaining to flush more precisely */ + { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; + size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); + size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; + DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); + return toFlush; + } +} + + +/*-===== Pre-defined compression levels =====-*/ + +#define ZSTD_MAX_CLEVEL 22 +int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; } +int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } + +static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { +{ /* "default" - for any srcSize > 256 KB */ + /* W, C, H, S, L, TL, strat */ + { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ + { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ + { 20, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */ + { 21, 16, 17, 1, 5, 0, ZSTD_dfast }, /* level 3 */ + { 21, 18, 18, 1, 5, 0, ZSTD_dfast }, /* level 4 */ + { 21, 18, 19, 2, 5, 2, ZSTD_greedy }, /* level 5 */ + { 21, 19, 19, 3, 5, 4, ZSTD_greedy }, /* level 6 */ + { 21, 19, 19, 3, 5, 8, ZSTD_lazy }, /* level 7 */ + { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */ + { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */ + { 22, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */ + { 22, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */ + { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */ + { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 13 */ + { 22, 22, 23, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */ + { 22, 23, 23, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */ + { 22, 22, 22, 5, 5, 48, ZSTD_btopt }, /* level 16 */ + { 23, 23, 22, 5, 4, 64, ZSTD_btopt }, /* level 17 */ + { 23, 23, 22, 6, 3, 64, ZSTD_btultra }, /* level 18 */ + { 23, 24, 22, 7, 3,256, ZSTD_btultra2}, /* level 19 */ + { 25, 25, 23, 7, 3,256, ZSTD_btultra2}, /* level 20 */ + { 26, 26, 24, 7, 3,512, ZSTD_btultra2}, /* level 21 */ + { 27, 27, 25, 9, 3,999, ZSTD_btultra2}, /* level 22 */ +}, +{ /* for srcSize <= 256 KB */ + /* W, C, H, S, L, T, strat */ + { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 18, 14, 14, 1, 5, 0, ZSTD_dfast }, /* level 2 */ + { 18, 16, 16, 1, 4, 0, ZSTD_dfast }, /* level 3 */ + { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/ + { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/ + { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/ + { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */ + { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 18, 18, 19, 5, 4, 12, ZSTD_btlazy2 }, /* level 11.*/ + { 18, 19, 19, 7, 4, 12, ZSTD_btlazy2 }, /* level 12.*/ + { 18, 18, 19, 4, 4, 16, ZSTD_btopt }, /* level 13 */ + { 18, 18, 19, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 18, 18, 19, 6, 3,128, ZSTD_btopt }, /* level 15.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 18, 19, 19, 6, 3,128, ZSTD_btultra2}, /* level 18.*/ + { 18, 19, 19, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 18, 19, 19, 10, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 18, 19, 19, 12, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 18, 19, 19, 13, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 128 KB */ + /* W, C, H, S, L, T, strat */ + { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */ + { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */ + { 17, 15, 16, 2, 5, 0, ZSTD_dfast }, /* level 3 */ + { 17, 17, 17, 2, 4, 0, ZSTD_dfast }, /* level 4 */ + { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */ + { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */ + { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */ + { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */ + { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */ + { 17, 17, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 11 */ + { 17, 18, 17, 7, 4, 12, ZSTD_btlazy2 }, /* level 12 */ + { 17, 18, 17, 3, 4, 12, ZSTD_btopt }, /* level 13.*/ + { 17, 18, 17, 4, 3, 32, ZSTD_btopt }, /* level 14.*/ + { 17, 18, 17, 6, 3,256, ZSTD_btopt }, /* level 15.*/ + { 17, 18, 17, 6, 3,128, ZSTD_btultra }, /* level 16.*/ + { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 17.*/ + { 17, 18, 17, 10, 3,512, ZSTD_btultra }, /* level 18.*/ + { 17, 18, 17, 5, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 17, 18, 17, 7, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 17, 18, 17, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 17, 18, 17, 11, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +{ /* for srcSize <= 16 KB */ + /* W, C, H, S, L, T, strat */ + { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */ + { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */ + { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */ + { 14, 14, 15, 2, 4, 0, ZSTD_dfast }, /* level 3 */ + { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4 */ + { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/ + { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */ + { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */ + { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/ + { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/ + { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/ + { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/ + { 14, 15, 14, 4, 3, 24, ZSTD_btopt }, /* level 12.*/ + { 14, 15, 14, 5, 3, 32, ZSTD_btultra }, /* level 13.*/ + { 14, 15, 15, 6, 3, 64, ZSTD_btultra }, /* level 14.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra }, /* level 15.*/ + { 14, 15, 15, 5, 3, 48, ZSTD_btultra2}, /* level 16.*/ + { 14, 15, 15, 6, 3,128, ZSTD_btultra2}, /* level 17.*/ + { 14, 15, 15, 7, 3,256, ZSTD_btultra2}, /* level 18.*/ + { 14, 15, 15, 8, 3,256, ZSTD_btultra2}, /* level 19.*/ + { 14, 15, 15, 8, 3,512, ZSTD_btultra2}, /* level 20.*/ + { 14, 15, 15, 9, 3,512, ZSTD_btultra2}, /* level 21.*/ + { 14, 15, 15, 10, 3,999, ZSTD_btultra2}, /* level 22.*/ +}, +}; + +static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize) +{ + ZSTD_compressionParameters cParams = ZSTD_getCParams_internal(compressionLevel, 0, dictSize, ZSTD_cpm_createCDict); + switch (cParams.strategy) { + case ZSTD_fast: + case ZSTD_dfast: + break; + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + cParams.hashLog += ZSTD_LAZY_DDSS_BUCKET_LOG; + break; + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + } + return cParams; +} + +static int ZSTD_dedicatedDictSearch_isSupported( + ZSTD_compressionParameters const* cParams) +{ + return (cParams->strategy >= ZSTD_greedy) + && (cParams->strategy <= ZSTD_lazy2) + && (cParams->hashLog >= cParams->chainLog) + && (cParams->chainLog <= 24); +} + +/** + * Reverses the adjustment applied to cparams when enabling dedicated dict + * search. This is used to recover the params set to be used in the working + * context. (Otherwise, those tables would also grow.) + */ +static void ZSTD_dedicatedDictSearch_revertCParams( + ZSTD_compressionParameters* cParams) { + switch (cParams->strategy) { + case ZSTD_fast: + case ZSTD_dfast: + break; + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG; + break; + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + } +} + +static U64 ZSTD_getCParamRowSize(U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + switch (mode) { + case ZSTD_cpm_unknown: + case ZSTD_cpm_noAttachDict: + case ZSTD_cpm_createCDict: + break; + case ZSTD_cpm_attachDict: + dictSize = 0; + break; + default: + assert(0); + break; + } + { int const unknown = srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN; + size_t const addedSize = unknown && dictSize > 0 ? 500 : 0; + return unknown && dictSize == 0 ? ZSTD_CONTENTSIZE_UNKNOWN : srcSizeHint+dictSize+addedSize; + } +} + +/*! ZSTD_getCParams_internal() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Note: srcSizeHint 0 means 0, use ZSTD_CONTENTSIZE_UNKNOWN for unknown. + * Use dictSize == 0 for unknown or unused. + * Note: `mode` controls how we treat the `dictSize`. See docs for `ZSTD_cParamMode_e`. */ +static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) +{ + U64 const rSize = ZSTD_getCParamRowSize(srcSizeHint, dictSize, mode); + U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); + int row; + DEBUGLOG(5, "ZSTD_getCParams_internal (cLevel=%i)", compressionLevel); + + /* row */ + if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ + else if (compressionLevel < 0) row = 0; /* entry 0 is baseline for fast mode */ + else if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; + else row = compressionLevel; + + { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; + /* acceleration factor */ + if (compressionLevel < 0) { + int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel); + cp.targetLength = (unsigned)(-clampedCompressionLevel); + } + /* refine parameters based on srcSize & dictSize */ + return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize, mode); + } +} + +/*! ZSTD_getCParams() : + * @return ZSTD_compressionParameters structure for a selected compression level, srcSize and dictSize. + * Size values are optional, provide 0 if not known or unused */ +ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) +{ + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); +} + +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +static ZSTD_parameters ZSTD_getParams_internal(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode) { + ZSTD_parameters params; + ZSTD_compressionParameters const cParams = ZSTD_getCParams_internal(compressionLevel, srcSizeHint, dictSize, mode); + DEBUGLOG(5, "ZSTD_getParams (cLevel=%i)", compressionLevel); + ZSTD_memset(¶ms, 0, sizeof(params)); + params.cParams = cParams; + params.fParams.contentSizeFlag = 1; + return params; +} + +/*! ZSTD_getParams() : + * same idea as ZSTD_getCParams() + * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). + * Fields of `ZSTD_frameParameters` are set to default values */ +ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { + if (srcSizeHint == 0) srcSizeHint = ZSTD_CONTENTSIZE_UNKNOWN; + return ZSTD_getParams_internal(compressionLevel, srcSizeHint, dictSize, ZSTD_cpm_unknown); +} diff --git a/lib/zstd/compress/zstd_compress_internal.h b/lib/zstd/compress/zstd_compress_internal.h new file mode 100644 index 000000000000..b56c482322ba --- /dev/null +++ b/lib/zstd/compress/zstd_compress_internal.h @@ -0,0 +1,1188 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* This header contains definitions + * that shall **only** be used by modules within lib/compress. + */ + +#ifndef ZSTD_COMPRESS_H +#define ZSTD_COMPRESS_H + +/*-************************************* +* Dependencies +***************************************/ +#include "../common/zstd_internal.h" +#include "zstd_cwksp.h" + + +/*-************************************* +* Constants +***************************************/ +#define kSearchStrength 8 +#define HASH_READ_SIZE 8 +#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". + It could be confused for a real successor at index "1", if sorted as larger than its predecessor. + It's not a big deal though : candidate will just be sorted again. + Additionally, candidate position 1 will be lost. + But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. + The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. + This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ + + +/*-************************************* +* Context memory management +***************************************/ +typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e; +typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage; + +typedef struct ZSTD_prefixDict_s { + const void* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; +} ZSTD_prefixDict; + +typedef struct { + void* dictBuffer; + void const* dict; + size_t dictSize; + ZSTD_dictContentType_e dictContentType; + ZSTD_CDict* cdict; +} ZSTD_localDict; + +typedef struct { + HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)]; + HUF_repeat repeatMode; +} ZSTD_hufCTables_t; + +typedef struct { + FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; + FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; + FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; + FSE_repeat offcode_repeatMode; + FSE_repeat matchlength_repeatMode; + FSE_repeat litlength_repeatMode; +} ZSTD_fseCTables_t; + +typedef struct { + ZSTD_hufCTables_t huf; + ZSTD_fseCTables_t fse; +} ZSTD_entropyCTables_t; + +typedef struct { + U32 off; /* Offset code (offset + ZSTD_REP_MOVE) for the match */ + U32 len; /* Raw length of match */ +} ZSTD_match_t; + +typedef struct { + U32 offset; /* Offset of sequence */ + U32 litLength; /* Length of literals prior to match */ + U32 matchLength; /* Raw length of match */ +} rawSeq; + +typedef struct { + rawSeq* seq; /* The start of the sequences */ + size_t pos; /* The index in seq where reading stopped. pos <= size. */ + size_t posInSequence; /* The position within the sequence at seq[pos] where reading + stopped. posInSequence <= seq[pos].litLength + seq[pos].matchLength */ + size_t size; /* The number of sequences. <= capacity. */ + size_t capacity; /* The capacity starting from `seq` pointer */ +} rawSeqStore_t; + +UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0}; + +typedef struct { + int price; + U32 off; + U32 mlen; + U32 litlen; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_optimal_t; + +typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e; + +typedef struct { + /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */ + unsigned* litFreq; /* table of literals statistics, of size 256 */ + unsigned* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */ + unsigned* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */ + unsigned* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */ + ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */ + ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */ + + U32 litSum; /* nb of literals */ + U32 litLengthSum; /* nb of litLength codes */ + U32 matchLengthSum; /* nb of matchLength codes */ + U32 offCodeSum; /* nb of offset codes */ + U32 litSumBasePrice; /* to compare to log2(litfreq) */ + U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */ + U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */ + U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ + ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ + const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ + ZSTD_literalCompressionMode_e literalCompressionMode; +} optState_t; + +typedef struct { + ZSTD_entropyCTables_t entropy; + U32 rep[ZSTD_REP_NUM]; +} ZSTD_compressedBlockState_t; + +typedef struct { + BYTE const* nextSrc; /* next block here to continue on current prefix */ + BYTE const* base; /* All regular indexes relative to this position */ + BYTE const* dictBase; /* extDict indexes relative to this position */ + U32 dictLimit; /* below that point, need extDict */ + U32 lowLimit; /* below that point, no more valid data */ +} ZSTD_window_t; + +typedef struct ZSTD_matchState_t ZSTD_matchState_t; +struct ZSTD_matchState_t { + ZSTD_window_t window; /* State for window round buffer management */ + U32 loadedDictEnd; /* index of end of dictionary, within context's referential. + * When loadedDictEnd != 0, a dictionary is in use, and still valid. + * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. + * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). + * When dict referential is copied into active context (i.e. not attached), + * loadedDictEnd == dictSize, since referential starts from zero. + */ + U32 nextToUpdate; /* index from which to continue table update */ + U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ + U32* hashTable; + U32* hashTable3; + U32* chainTable; + int dedicatedDictSearch; /* Indicates whether this matchState is using the + * dedicated dictionary search structure. + */ + optState_t opt; /* optimal parser state */ + const ZSTD_matchState_t* dictMatchState; + ZSTD_compressionParameters cParams; + const rawSeqStore_t* ldmSeqStore; +}; + +typedef struct { + ZSTD_compressedBlockState_t* prevCBlock; + ZSTD_compressedBlockState_t* nextCBlock; + ZSTD_matchState_t matchState; +} ZSTD_blockState_t; + +typedef struct { + U32 offset; + U32 checksum; +} ldmEntry_t; + +typedef struct { + BYTE const* split; + U32 hash; + U32 checksum; + ldmEntry_t* bucket; +} ldmMatchCandidate_t; + +#define LDM_BATCH_SIZE 64 + +typedef struct { + ZSTD_window_t window; /* State for the window round buffer management */ + ldmEntry_t* hashTable; + U32 loadedDictEnd; + BYTE* bucketOffsets; /* Next position in bucket to insert entry */ + size_t splitIndices[LDM_BATCH_SIZE]; + ldmMatchCandidate_t matchCandidates[LDM_BATCH_SIZE]; +} ldmState_t; + +typedef struct { + U32 enableLdm; /* 1 if enable long distance matching */ + U32 hashLog; /* Log size of hashTable */ + U32 bucketSizeLog; /* Log bucket size for collision resolution, at most 8 */ + U32 minMatchLength; /* Minimum match length */ + U32 hashRateLog; /* Log number of entries to skip */ + U32 windowLog; /* Window log for the LDM */ +} ldmParams_t; + +typedef struct { + int collectSequences; + ZSTD_Sequence* seqStart; + size_t seqIndex; + size_t maxSequences; +} SeqCollector; + +struct ZSTD_CCtx_params_s { + ZSTD_format_e format; + ZSTD_compressionParameters cParams; + ZSTD_frameParameters fParams; + + int compressionLevel; + int forceWindow; /* force back-references to respect limit of + * 1< 63) ? ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; +} + +/* ZSTD_MLcode() : + * note : mlBase = matchLength - MINMATCH; + * because it's the format it's stored in seqStore->sequences */ +MEM_STATIC U32 ZSTD_MLcode(U32 mlBase) +{ + static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 }; + static const U32 ML_deltaCode = 36; + return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; +} + +typedef struct repcodes_s { + U32 rep[3]; +} repcodes_t; + +MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0) +{ + repcodes_t newReps; + if (offset >= ZSTD_REP_NUM) { /* full offset */ + newReps.rep[2] = rep[1]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = offset - ZSTD_REP_MOVE; + } else { /* repcode */ + U32 const repCode = offset + ll0; + if (repCode > 0) { /* note : if repCode==0, no change */ + U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2]; + newReps.rep[1] = rep[0]; + newReps.rep[0] = currentOffset; + } else { /* repCode == 0 */ + ZSTD_memcpy(&newReps, rep, sizeof(newReps)); + } + } + return newReps; +} + +/* ZSTD_cParam_withinBounds: + * @return 1 if value is within cParam bounds, + * 0 otherwise */ +MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} + +/* ZSTD_noCompressBlock() : + * Writes uncompressed block to dst buffer from given src. + * Returns the size of the block */ +MEM_STATIC size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) +{ + U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, + dstSize_tooSmall, "dst buf too small for uncompressed block"); + MEM_writeLE24(dst, cBlockHeader24); + ZSTD_memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); + return ZSTD_blockHeaderSize + srcSize; +} + +MEM_STATIC size_t ZSTD_rleCompressBlock (void* dst, size_t dstCapacity, BYTE src, size_t srcSize, U32 lastBlock) +{ + BYTE* const op = (BYTE*)dst; + U32 const cBlockHeader = lastBlock + (((U32)bt_rle)<<1) + (U32)(srcSize << 3); + RETURN_ERROR_IF(dstCapacity < 4, dstSize_tooSmall, ""); + MEM_writeLE24(op, cBlockHeader); + op[3] = src; + return 4; +} + + +/* ZSTD_minGain() : + * minimum compression required + * to generate a compress block or a compressed literals section. + * note : use same formula for both situations */ +MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) +{ + U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; + ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); + assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); + return (srcSize >> minlog) + 2; +} + +MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) +{ + switch (cctxParams->literalCompressionMode) { + case ZSTD_lcm_huffman: + return 0; + case ZSTD_lcm_uncompressed: + return 1; + default: + assert(0 /* impossible: pre-validated */); + /* fall-through */ + case ZSTD_lcm_auto: + return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); + } +} + +/*! ZSTD_safecopyLiterals() : + * memcpy() function that won't read beyond more than WILDCOPY_OVERLENGTH bytes past ilimit_w. + * Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single + * large copies. + */ +static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) { + assert(iend > ilimit_w); + if (ip <= ilimit_w) { + ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap); + op += ilimit_w - ip; + ip = ilimit_w; + } + while (ip < iend) *op++ = *ip++; +} + +/*! ZSTD_storeSeq() : + * Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t. + * `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes). + * `mlBase` : matchLength - MINMATCH + * Allowed to overread literals up to litLimit. +*/ +HINT_INLINE UNUSED_ATTR +void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase) +{ + BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH; + BYTE const* const litEnd = literals + litLength; +#if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6) + static const BYTE* g_start = NULL; + if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */ + { U32 const pos = (U32)((const BYTE*)literals - g_start); + DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u", + pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode); + } +#endif + assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq); + /* copy Literals */ + assert(seqStorePtr->maxNbLit <= 128 KB); + assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); + assert(literals + litLength <= litLimit); + if (litEnd <= litLimit_w) { + /* Common case we can use wildcopy. + * First copy 16 bytes, because literals are likely short. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(seqStorePtr->lit, literals); + if (litLength > 16) { + ZSTD_wildcopy(seqStorePtr->lit+16, literals+16, (ptrdiff_t)litLength-16, ZSTD_no_overlap); + } + } else { + ZSTD_safecopyLiterals(seqStorePtr->lit, literals, litEnd, litLimit_w); + } + seqStorePtr->lit += litLength; + + /* literal Length */ + if (litLength>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ + seqStorePtr->longLengthID = 1; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].litLength = (U16)litLength; + + /* match offset */ + seqStorePtr->sequences[0].offset = offCode + 1; + + /* match Length */ + if (mlBase>0xFFFF) { + assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */ + seqStorePtr->longLengthID = 2; + seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); + } + seqStorePtr->sequences[0].matchLength = (U16)mlBase; + + seqStorePtr->sequences++; +} + + +/*-************************************* +* Match length counter +***************************************/ +static unsigned ZSTD_NbCommonBytes (size_t val) +{ + if (MEM_isLittleEndian()) { + if (MEM_64bits()) { +# if (__GNUC__ >= 4) + return (__builtin_ctzll((U64)val) >> 3); +# else + static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, + 0, 3, 1, 3, 1, 4, 2, 7, + 0, 2, 3, 6, 1, 5, 3, 5, + 1, 3, 4, 4, 2, 5, 6, 7, + 7, 0, 1, 2, 3, 3, 4, 6, + 2, 6, 5, 5, 3, 4, 5, 6, + 7, 1, 2, 4, 6, 4, 4, 5, + 7, 2, 6, 5, 7, 6, 7, 7 }; + return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58]; +# endif + } else { /* 32 bits */ +# if (__GNUC__ >= 3) + return (__builtin_ctz((U32)val) >> 3); +# else + static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, + 3, 2, 2, 1, 3, 2, 0, 1, + 3, 3, 1, 2, 2, 2, 2, 0, + 3, 1, 2, 0, 1, 0, 1, 1 }; + return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27]; +# endif + } + } else { /* Big Endian CPU */ + if (MEM_64bits()) { +# if (__GNUC__ >= 4) + return (__builtin_clzll(val) >> 3); +# else + unsigned r; + const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */ + if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; } + if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; } + r += (!val); + return r; +# endif + } else { /* 32 bits */ +# if (__GNUC__ >= 3) + return (__builtin_clz((U32)val) >> 3); +# else + unsigned r; + if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; } + r += (!val); + return r; +# endif + } } +} + + +MEM_STATIC size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit) +{ + const BYTE* const pStart = pIn; + const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1); + + if (pIn < pInLoopLimit) { + { size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (diff) return ZSTD_NbCommonBytes(diff); } + pIn+=sizeof(size_t); pMatch+=sizeof(size_t); + while (pIn < pInLoopLimit) { + size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn); + if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; } + pIn += ZSTD_NbCommonBytes(diff); + return (size_t)(pIn - pStart); + } } + if (MEM_64bits() && (pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; } + if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; } + if ((pIn> (32-h) ; } +MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */ + +static const U32 prime4bytes = 2654435761U; +static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; } +static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); } + +static const U64 prime5bytes = 889523592379ULL; +static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; } +static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); } + +static const U64 prime6bytes = 227718039650203ULL; +static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; } +static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); } + +static const U64 prime7bytes = 58295818150454627ULL; +static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; } +static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); } + +static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL; +static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; } +static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); } + +MEM_STATIC FORCE_INLINE_ATTR +size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls) +{ + switch(mls) + { + default: + case 4: return ZSTD_hash4Ptr(p, hBits); + case 5: return ZSTD_hash5Ptr(p, hBits); + case 6: return ZSTD_hash6Ptr(p, hBits); + case 7: return ZSTD_hash7Ptr(p, hBits); + case 8: return ZSTD_hash8Ptr(p, hBits); + } +} + +/** ZSTD_ipow() : + * Return base^exponent. + */ +static U64 ZSTD_ipow(U64 base, U64 exponent) +{ + U64 power = 1; + while (exponent) { + if (exponent & 1) power *= base; + exponent >>= 1; + base *= base; + } + return power; +} + +#define ZSTD_ROLL_HASH_CHAR_OFFSET 10 + +/** ZSTD_rollingHash_append() : + * Add the buffer to the hash value. + */ +static U64 ZSTD_rollingHash_append(U64 hash, void const* buf, size_t size) +{ + BYTE const* istart = (BYTE const*)buf; + size_t pos; + for (pos = 0; pos < size; ++pos) { + hash *= prime8bytes; + hash += istart[pos] + ZSTD_ROLL_HASH_CHAR_OFFSET; + } + return hash; +} + +/** ZSTD_rollingHash_compute() : + * Compute the rolling hash value of the buffer. + */ +MEM_STATIC U64 ZSTD_rollingHash_compute(void const* buf, size_t size) +{ + return ZSTD_rollingHash_append(0, buf, size); +} + +/** ZSTD_rollingHash_primePower() : + * Compute the primePower to be passed to ZSTD_rollingHash_rotate() for a hash + * over a window of length bytes. + */ +MEM_STATIC U64 ZSTD_rollingHash_primePower(U32 length) +{ + return ZSTD_ipow(prime8bytes, length - 1); +} + +/** ZSTD_rollingHash_rotate() : + * Rotate the rolling hash by one byte. + */ +MEM_STATIC U64 ZSTD_rollingHash_rotate(U64 hash, BYTE toRemove, BYTE toAdd, U64 primePower) +{ + hash -= (toRemove + ZSTD_ROLL_HASH_CHAR_OFFSET) * primePower; + hash *= prime8bytes; + hash += toAdd + ZSTD_ROLL_HASH_CHAR_OFFSET; + return hash; +} + +/*-************************************* +* Round buffer management +***************************************/ +#if (ZSTD_WINDOWLOG_MAX_64 > 31) +# error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" +#endif +/* Max current allowed */ +#define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) +/* Maximum chunk size before overflow correction needs to be called again */ +#define ZSTD_CHUNKSIZE_MAX \ + ( ((U32)-1) /* Maximum ending current index */ \ + - ZSTD_CURRENT_MAX) /* Maximum beginning lowLimit */ + +/** + * ZSTD_window_clear(): + * Clears the window containing the history by simply setting it to empty. + */ +MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window) +{ + size_t const endT = (size_t)(window->nextSrc - window->base); + U32 const end = (U32)endT; + + window->lowLimit = end; + window->dictLimit = end; +} + +/** + * ZSTD_window_hasExtDict(): + * Returns non-zero if the window has a non-empty extDict. + */ +MEM_STATIC U32 ZSTD_window_hasExtDict(ZSTD_window_t const window) +{ + return window.lowLimit < window.dictLimit; +} + +/** + * ZSTD_matchState_dictMode(): + * Inspects the provided matchState and figures out what dictMode should be + * passed to the compressor. + */ +MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms) +{ + return ZSTD_window_hasExtDict(ms->window) ? + ZSTD_extDict : + ms->dictMatchState != NULL ? + (ms->dictMatchState->dedicatedDictSearch ? ZSTD_dedicatedDictSearch : ZSTD_dictMatchState) : + ZSTD_noDict; +} + +/** + * ZSTD_window_needOverflowCorrection(): + * Returns non-zero if the indices are getting too large and need overflow + * protection. + */ +MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window, + void const* srcEnd) +{ + U32 const curr = (U32)((BYTE const*)srcEnd - window.base); + return curr > ZSTD_CURRENT_MAX; +} + +/** + * ZSTD_window_correctOverflow(): + * Reduces the indices to protect from index overflow. + * Returns the correction made to the indices, which must be applied to every + * stored index. + * + * The least significant cycleLog bits of the indices must remain the same, + * which may be 0. Every index up to maxDist in the past must be valid. + * NOTE: (maxDist & cycleMask) must be zero. + */ +MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog, + U32 maxDist, void const* src) +{ + /* preemptive overflow correction: + * 1. correction is large enough: + * lowLimit > (3<<29) ==> current > 3<<29 + 1< (3<<29 + 1< (3<<29) - (1< (3<<29) - (1<<30) (NOTE: chainLog <= 30) + * > 1<<29 + * + * 2. (ip+ZSTD_CHUNKSIZE_MAX - cctx->base) doesn't overflow: + * After correction, current is less than (1<base < 1<<32. + * 3. (cctx->lowLimit + 1< 3<<29 + 1<base); + U32 const currentCycle0 = curr & cycleMask; + /* Exclude zero so that newCurrent - maxDist >= 1. */ + U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0; + U32 const newCurrent = currentCycle1 + maxDist; + U32 const correction = curr - newCurrent; + assert((maxDist & cycleMask) == 0); + assert(curr > newCurrent); + /* Loose bound, should be around 1<<29 (see above) */ + assert(correction > 1<<28); + + window->base += correction; + window->dictBase += correction; + if (window->lowLimit <= correction) window->lowLimit = 1; + else window->lowLimit -= correction; + if (window->dictLimit <= correction) window->dictLimit = 1; + else window->dictLimit -= correction; + + /* Ensure we can still reference the full window. */ + assert(newCurrent >= maxDist); + assert(newCurrent - maxDist >= 1); + /* Ensure that lowLimit and dictLimit didn't underflow. */ + assert(window->lowLimit <= newCurrent); + assert(window->dictLimit <= newCurrent); + + DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction, + window->lowLimit); + return correction; +} + +/** + * ZSTD_window_enforceMaxDist(): + * Updates lowLimit so that: + * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd + * + * It ensures index is valid as long as index >= lowLimit. + * This must be called before a block compression call. + * + * loadedDictEnd is only defined if a dictionary is in use for current compression. + * As the name implies, loadedDictEnd represents the index at end of dictionary. + * The value lies within context's referential, it can be directly compared to blockEndIdx. + * + * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. + * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. + * This is because dictionaries are allowed to be referenced fully + * as long as the last byte of the dictionary is in the window. + * Once input has progressed beyond window size, dictionary cannot be referenced anymore. + * + * In normal dict mode, the dictionary lies between lowLimit and dictLimit. + * In dictMatchState mode, lowLimit and dictLimit are the same, + * and the dictionary is below them. + * forceWindow and dictMatchState are therefore incompatible. + */ +MEM_STATIC void +ZSTD_window_enforceMaxDist(ZSTD_window_t* window, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; + DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + + /* - When there is no dictionary : loadedDictEnd == 0. + In which case, the test (blockEndIdx > maxDist) is merely to avoid + overflowing next operation `newLowLimit = blockEndIdx - maxDist`. + - When there is a standard dictionary : + Index referential is copied from the dictionary, + which means it starts from 0. + In which case, loadedDictEnd == dictSize, + and it makes sense to compare `blockEndIdx > maxDist + dictSize` + since `blockEndIdx` also starts from zero. + - When there is an attached dictionary : + loadedDictEnd is expressed within the referential of the context, + so it can be directly compared against blockEndIdx. + */ + if (blockEndIdx > maxDist + loadedDictEnd) { + U32 const newLowLimit = blockEndIdx - maxDist; + if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; + if (window->dictLimit < window->lowLimit) { + DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u", + (unsigned)window->dictLimit, (unsigned)window->lowLimit); + window->dictLimit = window->lowLimit; + } + /* On reaching window size, dictionaries are invalidated */ + if (loadedDictEndPtr) *loadedDictEndPtr = 0; + if (dictMatchStatePtr) *dictMatchStatePtr = NULL; + } +} + +/* Similar to ZSTD_window_enforceMaxDist(), + * but only invalidates dictionary + * when input progresses beyond window size. + * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) + * loadedDictEnd uses same referential as window->base + * maxDist is the window size */ +MEM_STATIC void +ZSTD_checkDictValidity(const ZSTD_window_t* window, + const void* blockEnd, + U32 maxDist, + U32* loadedDictEndPtr, + const ZSTD_matchState_t** dictMatchStatePtr) +{ + assert(loadedDictEndPtr != NULL); + assert(dictMatchStatePtr != NULL); + { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); + U32 const loadedDictEnd = *loadedDictEndPtr; + DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", + (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); + assert(blockEndIdx >= loadedDictEnd); + + if (blockEndIdx > loadedDictEnd + maxDist) { + /* On reaching window size, dictionaries are invalidated. + * For simplification, if window size is reached anywhere within next block, + * the dictionary is invalidated for the full block. + */ + DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); + *loadedDictEndPtr = 0; + *dictMatchStatePtr = NULL; + } else { + if (*loadedDictEndPtr != 0) { + DEBUGLOG(6, "dictionary considered valid for current block"); + } } } +} + +MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) { + ZSTD_memset(window, 0, sizeof(*window)); + window->base = (BYTE const*)""; + window->dictBase = (BYTE const*)""; + window->dictLimit = 1; /* start from 1, so that 1st position is valid */ + window->lowLimit = 1; /* it ensures first and later CCtx usages compress the same */ + window->nextSrc = window->base + 1; /* see issue #1241 */ +} + +/** + * ZSTD_window_update(): + * Updates the window by appending [src, src + srcSize) to the window. + * If it is not contiguous, the current prefix becomes the extDict, and we + * forget about the extDict. Handles overlap of the prefix and extDict. + * Returns non-zero if the segment is contiguous. + */ +MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window, + void const* src, size_t srcSize) +{ + BYTE const* const ip = (BYTE const*)src; + U32 contiguous = 1; + DEBUGLOG(5, "ZSTD_window_update"); + if (srcSize == 0) + return contiguous; + assert(window->base != NULL); + assert(window->dictBase != NULL); + /* Check if blocks follow each other */ + if (src != window->nextSrc) { + /* not contiguous */ + size_t const distanceFromBase = (size_t)(window->nextSrc - window->base); + DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit); + window->lowLimit = window->dictLimit; + assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */ + window->dictLimit = (U32)distanceFromBase; + window->dictBase = window->base; + window->base = ip - distanceFromBase; + /* ms->nextToUpdate = window->dictLimit; */ + if (window->dictLimit - window->lowLimit < HASH_READ_SIZE) window->lowLimit = window->dictLimit; /* too small extDict */ + contiguous = 0; + } + window->nextSrc = ip + srcSize; + /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */ + if ( (ip+srcSize > window->dictBase + window->lowLimit) + & (ip < window->dictBase + window->dictLimit)) { + ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase; + U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx; + window->lowLimit = lowLimitMax; + DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit); + } + return contiguous; +} + +/** + * Returns the lowest allowed match index. It may either be in the ext-dict or the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.lowLimit; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary + * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't + * valid for the entire block. So this check is sufficient to find the lowest valid match index. + */ + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} + +/** + * Returns the lowest allowed match index in the prefix. + */ +MEM_STATIC U32 ZSTD_getLowestPrefixIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog) +{ + U32 const maxDistance = 1U << windowLog; + U32 const lowestValid = ms->window.dictLimit; + U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + U32 const isDictionary = (ms->loadedDictEnd != 0); + /* When computing the lowest prefix index we need to take the dictionary into account to handle + * the edge case where the dictionary and the source are contiguous in memory. + */ + U32 const matchLowest = isDictionary ? lowestValid : withinWindow; + return matchLowest; +} + + + +/* debug functions */ +#if (DEBUGLEVEL>=2) + +MEM_STATIC double ZSTD_fWeight(U32 rawStat) +{ + U32 const fp_accuracy = 8; + U32 const fp_multiplier = (1 << fp_accuracy); + U32 const newStat = rawStat + 1; + U32 const hb = ZSTD_highbit32(newStat); + U32 const BWeight = hb * fp_multiplier; + U32 const FWeight = (newStat << fp_accuracy) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + fp_accuracy < 31); + return (double)weight / fp_multiplier; +} + +/* display a table content, + * listing each element, its frequency, and its predicted bit cost */ +MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max) +{ + unsigned u, sum; + for (u=0, sum=0; u<=max; u++) sum += table[u]; + DEBUGLOG(2, "total nb elts: %u", sum); + for (u=0; u<=max; u++) { + DEBUGLOG(2, "%2u: %5u (%.2f)", + u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) ); + } +} + +#endif + + + +/* =============================================================== + * Shared internal declarations + * These prototypes may be called from sources not in lib/compress + * =============================================================== */ + +/* ZSTD_loadCEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * return : size of dictionary header (size of magic number + dict ID + entropy tables) + * assumptions : magic number supposed already checked + * and dictSize >= 8 */ +size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace, + const void* const dict, size_t dictSize); + +void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs); + +/* ============================================================== + * Private declarations + * These prototypes shall only be called from within lib/compress + * ============================================================== */ + +/* ZSTD_getCParamsFromCCtxParams() : + * cParams are built depending on compressionLevel, src size hints, + * LDM and manually set compression parameters. + * Note: srcSizeHint == 0 means 0! + */ +ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams( + const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize, ZSTD_cParamMode_e mode); + +/*! ZSTD_initCStream_internal() : + * Private use only. Init streaming operation. + * expects params to be valid. + * must receive dict, or cdict, or none, but not both. + * @return : 0, or an error code */ +size_t ZSTD_initCStream_internal(ZSTD_CStream* zcs, + const void* dict, size_t dictSize, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, unsigned long long pledgedSrcSize); + +void ZSTD_resetSeqStore(seqStore_t* ssPtr); + +/*! ZSTD_getCParamsFromCDict() : + * as the name implies */ +ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); + +/* ZSTD_compressBegin_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx, + const void* dict, size_t dictSize, + ZSTD_dictContentType_e dictContentType, + ZSTD_dictTableLoadMethod_e dtlm, + const ZSTD_CDict* cdict, + const ZSTD_CCtx_params* params, + unsigned long long pledgedSrcSize); + +/* ZSTD_compress_advanced_internal() : + * Private use only. To be called from zstdmt_compress.c. */ +size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict,size_t dictSize, + const ZSTD_CCtx_params* params); + + +/* ZSTD_writeLastEmptyBlock() : + * output an empty Block with end-of-frame mark to complete a frame + * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) + * or an error code if `dstCapacity` is too small ( 1 */ +U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat); + +/** ZSTD_CCtx_trace() : + * Trace the end of a compression call. + */ +void ZSTD_CCtx_trace(ZSTD_CCtx* cctx, size_t extraCSize); + +#endif /* ZSTD_COMPRESS_H */ diff --git a/lib/zstd/compress/zstd_compress_literals.c b/lib/zstd/compress/zstd_compress_literals.c new file mode 100644 index 000000000000..655bcda4d1f1 --- /dev/null +++ b/lib/zstd/compress/zstd_compress_literals.c @@ -0,0 +1,158 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +#include "zstd_compress_literals.h" + +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE*)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, ""); + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + ZSTD_memcpy(ostart + flSize, src, srcSize); + DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize)); + return srcSize + flSize; +} + +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + BYTE* const ostart = (BYTE*)dst; + U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); + + (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ + + switch(flSize) + { + case 1: /* 2 - 1 - 5 */ + ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); + break; + case 2: /* 2 - 2 - 12 */ + MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); + break; + case 3: /* 2 - 2 - 20 */ + MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); + break; + default: /* not necessary : flSize is {1,2,3} */ + assert(0); + } + + ostart[flSize] = *(const BYTE*)src; + DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1); + return flSize+1; +} + +size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, + const int bmi2) +{ + size_t const minGain = ZSTD_minGain(srcSize, strategy); + size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); + BYTE* const ostart = (BYTE*)dst; + U32 singleStream = srcSize < 256; + symbolEncodingType_e hType = set_compressed; + size_t cLitSize; + + DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)", + disableLiteralCompression, (U32)srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralCompression) + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + + RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); + { HUF_repeat repeat = prevHuf->repeatMode; + int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; + if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; + cLitSize = singleStream ? + HUF_compress1X_repeat( + ostart+lhSize, dstCapacity-lhSize, src, srcSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) : + HUF_compress4X_repeat( + ostart+lhSize, dstCapacity-lhSize, src, srcSize, + HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize, + (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); + if (repeat != HUF_repeat_none) { + /* reused the existing table */ + DEBUGLOG(5, "Reusing previous huffman table"); + hType = set_repeat; + } + } + + if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); + } + if (cLitSize==1) { + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); + } + + if (hType == set_compressed) { + /* using a newly constructed table */ + nextHuf->repeatMode = HUF_repeat_check; + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize)); + return lhSize+cLitSize; +} diff --git a/lib/zstd/compress/zstd_compress_literals.h b/lib/zstd/compress/zstd_compress_literals.h new file mode 100644 index 000000000000..9904c0cd30a0 --- /dev/null +++ b/lib/zstd/compress/zstd_compress_literals.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_LITERALS_H +#define ZSTD_COMPRESS_LITERALS_H + +#include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */ + + +size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); + +size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_strategy strategy, int disableLiteralCompression, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + void* entropyWorkspace, size_t entropyWorkspaceSize, + const int bmi2); + +#endif /* ZSTD_COMPRESS_LITERALS_H */ diff --git a/lib/zstd/compress/zstd_compress_sequences.c b/lib/zstd/compress/zstd_compress_sequences.c new file mode 100644 index 000000000000..08a5b89019dd --- /dev/null +++ b/lib/zstd/compress/zstd_compress_sequences.c @@ -0,0 +1,439 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +#include "zstd_compress_sequences.h" + +/** + * -log2(x / 256) lookup table for x in [0, 256). + * If x == 0: Return 0 + * Else: Return floor(-log2(x / 256) * 256) + */ +static unsigned const kInverseProbabilityLog256[256] = { + 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, + 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, + 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, + 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, + 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, + 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, + 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, + 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, + 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, + 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, + 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, + 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, + 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, + 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, + 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, + 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, + 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, + 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, + 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, + 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, + 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, + 5, 4, 2, 1, +}; + +static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { + void const* ptr = ctable; + U16 const* u16ptr = (U16 const*)ptr; + U32 const maxSymbolValue = MEM_read16(u16ptr + 1); + return maxSymbolValue; +} + +/** + * Returns true if we should use ncount=-1 else we should + * use ncount=1 for low probability symbols instead. + */ +static unsigned ZSTD_useLowProbCount(size_t const nbSeq) +{ + /* Heuristic: This should cover most blocks <= 16K and + * start to fade out after 16K to about 32K depending on + * comprssibility. + */ + return nbSeq >= 2048; +} + +/** + * Returns the cost in bytes of encoding the normalized count header. + * Returns an error if any of the helper functions return an error. + */ +static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, + size_t const nbSeq, unsigned const FSELog) +{ + BYTE wksp[FSE_NCOUNTBOUND]; + S16 norm[MaxSeq + 1]; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max, ZSTD_useLowProbCount(nbSeq)), ""); + return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); +} + +/** + * Returns the cost in bits of encoding the distribution described by count + * using the entropy bound. + */ +static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) +{ + unsigned cost = 0; + unsigned s; + for (s = 0; s <= max; ++s) { + unsigned norm = (unsigned)((256 * count[s]) / total); + if (count[s] != 0 && norm == 0) + norm = 1; + assert(count[s] < total); + cost += count[s] * kInverseProbabilityLog256[norm]; + } + return cost >> 8; +} + +/** + * Returns the cost in bits of encoding the distribution in count using ctable. + * Returns an error if ctable cannot represent all the symbols in count. + */ +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max) +{ + unsigned const kAccuracyLog = 8; + size_t cost = 0; + unsigned s; + FSE_CState_t cstate; + FSE_initCState(&cstate, ctable); + if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { + DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", + ZSTD_getFSEMaxSymbolValue(ctable), max); + return ERROR(GENERIC); + } + for (s = 0; s <= max; ++s) { + unsigned const tableLog = cstate.stateLog; + unsigned const badCost = (tableLog + 1) << kAccuracyLog; + unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); + if (count[s] == 0) + continue; + if (bitCost >= badCost) { + DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); + return ERROR(GENERIC); + } + cost += (size_t)count[s] * bitCost; + } + return cost >> kAccuracyLog; +} + +/** + * Returns the cost in bits of encoding the distribution in count using the + * table described by norm. The max symbol support by norm is assumed >= max. + * norm must be valid for every symbol with non-zero probability in count. + */ +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max) +{ + unsigned const shift = 8 - accuracyLog; + size_t cost = 0; + unsigned s; + assert(accuracyLog <= 8); + for (s = 0; s <= max; ++s) { + unsigned const normAcc = (norm[s] != -1) ? (unsigned)norm[s] : 1; + unsigned const norm256 = normAcc << shift; + assert(norm256 > 0); + assert(norm256 < 256); + cost += count[s] * kInverseProbabilityLog256[norm256]; + } + return cost >> 8; +} + +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy) +{ + ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); + if (mostFrequent == nbSeq) { + *repeatMode = FSE_repeat_none; + if (isDefaultAllowed && nbSeq <= 2) { + /* Prefer set_basic over set_rle when there are 2 or less symbols, + * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. + * If basic encoding isn't possible, always choose RLE. + */ + DEBUGLOG(5, "Selected set_basic"); + return set_basic; + } + DEBUGLOG(5, "Selected set_rle"); + return set_rle; + } + if (strategy < ZSTD_lazy) { + if (isDefaultAllowed) { + size_t const staticFse_nbSeq_max = 1000; + size_t const mult = 10 - strategy; + size_t const baseLog = 3; + size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ + assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ + assert(mult <= 9 && mult >= 7); + if ( (*repeatMode == FSE_repeat_valid) + && (nbSeq < staticFse_nbSeq_max) ) { + DEBUGLOG(5, "Selected set_repeat"); + return set_repeat; + } + if ( (nbSeq < dynamicFse_nbSeq_min) + || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { + DEBUGLOG(5, "Selected set_basic"); + /* The format allows default tables to be repeated, but it isn't useful. + * When using simple heuristics to select encoding type, we don't want + * to confuse these tables with dictionaries. When running more careful + * analysis, we don't need to waste time checking both repeating tables + * and default tables. + */ + *repeatMode = FSE_repeat_none; + return set_basic; + } + } + } else { + size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); + size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); + size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); + size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); + + if (isDefaultAllowed) { + assert(!ZSTD_isError(basicCost)); + assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); + } + assert(!ZSTD_isError(NCountCost)); + assert(compressedCost < ERROR(maxCode)); + DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", + (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); + if (basicCost <= repeatCost && basicCost <= compressedCost) { + DEBUGLOG(5, "Selected set_basic"); + assert(isDefaultAllowed); + *repeatMode = FSE_repeat_none; + return set_basic; + } + if (repeatCost <= compressedCost) { + DEBUGLOG(5, "Selected set_repeat"); + assert(!ZSTD_isError(repeatCost)); + return set_repeat; + } + assert(compressedCost < basicCost && compressedCost < repeatCost); + } + DEBUGLOG(5, "Selected set_compressed"); + *repeatMode = FSE_repeat_check; + return set_compressed; +} + +typedef struct { + S16 norm[MaxSeq + 1]; + U32 wksp[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(MaxSeq, MaxFSELog)]; +} ZSTD_BuildCTableWksp; + +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* entropyWorkspace, size_t entropyWorkspaceSize) +{ + BYTE* op = (BYTE*)dst; + const BYTE* const oend = op + dstCapacity; + DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); + + switch (type) { + case set_rle: + FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max), ""); + RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall, "not enough space"); + *op = codeTable[0]; + return 1; + case set_repeat: + ZSTD_memcpy(nextCTable, prevCTable, prevCTableSize); + return 0; + case set_basic: + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, entropyWorkspace, entropyWorkspaceSize), ""); /* note : could be pre-calculated */ + return 0; + case set_compressed: { + ZSTD_BuildCTableWksp* wksp = (ZSTD_BuildCTableWksp*)entropyWorkspace; + size_t nbSeq_1 = nbSeq; + const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); + if (count[codeTable[nbSeq-1]] > 1) { + count[codeTable[nbSeq-1]]--; + nbSeq_1--; + } + assert(nbSeq_1 > 1); + assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp)); + (void)entropyWorkspaceSize; + FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), ""); + { size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog); /* overflow protected */ + FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed"); + FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), ""); + return NCountSize; + } + } + default: assert(0); RETURN_ERROR(GENERIC, "impossible to reach"); + } +} + +FORCE_INLINE_TEMPLATE size_t +ZSTD_encodeSequences_body( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + BIT_CStream_t blockStream; + FSE_CState_t stateMatchLength; + FSE_CState_t stateOffsetBits; + FSE_CState_t stateLitLength; + + RETURN_ERROR_IF( + ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), + dstSize_tooSmall, "not enough space remaining"); + DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", + (int)(blockStream.endPtr - blockStream.startPtr), + (unsigned)dstCapacity); + + /* first symbols */ + FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); + FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); + FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); + BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); + if (MEM_32bits()) BIT_flushBits(&blockStream); + if (longOffsets) { + U32 const ofBits = ofCodeTable[nbSeq-1]; + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); + BIT_flushBits(&blockStream); + } + BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, + ofBits - extraBits); + } else { + BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); + } + BIT_flushBits(&blockStream); + + { size_t n; + for (n=nbSeq-2 ; n= 64-7-(LLFSELog+MLFSELog+OffFSELog))) + BIT_flushBits(&blockStream); /* (7)*/ + BIT_addBits(&blockStream, sequences[n].litLength, llBits); + if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); + BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); + if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); + if (longOffsets) { + unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); + if (extraBits) { + BIT_addBits(&blockStream, sequences[n].offset, extraBits); + BIT_flushBits(&blockStream); /* (7)*/ + } + BIT_addBits(&blockStream, sequences[n].offset >> extraBits, + ofBits - extraBits); /* 31 */ + } else { + BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ + } + BIT_flushBits(&blockStream); /* (7)*/ + DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); + } } + + DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); + FSE_flushCState(&blockStream, &stateMatchLength); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); + FSE_flushCState(&blockStream, &stateOffsetBits); + DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); + FSE_flushCState(&blockStream, &stateLitLength); + + { size_t const streamSize = BIT_closeCStream(&blockStream); + RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); + return streamSize; + } +} + +static size_t +ZSTD_encodeSequences_default( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + + +#if DYNAMIC_BMI2 + +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_encodeSequences_bmi2( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets) +{ + return ZSTD_encodeSequences_body(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} + +#endif + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) +{ + DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); +#if DYNAMIC_BMI2 + if (bmi2) { + return ZSTD_encodeSequences_bmi2(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); + } +#endif + (void)bmi2; + return ZSTD_encodeSequences_default(dst, dstCapacity, + CTable_MatchLength, mlCodeTable, + CTable_OffsetBits, ofCodeTable, + CTable_LitLength, llCodeTable, + sequences, nbSeq, longOffsets); +} diff --git a/lib/zstd/compress/zstd_compress_sequences.h b/lib/zstd/compress/zstd_compress_sequences.h new file mode 100644 index 000000000000..7991364c2f71 --- /dev/null +++ b/lib/zstd/compress/zstd_compress_sequences.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_SEQUENCES_H +#define ZSTD_COMPRESS_SEQUENCES_H + +#include "../common/fse.h" /* FSE_repeat, FSE_CTable */ +#include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */ + +typedef enum { + ZSTD_defaultDisallowed = 0, + ZSTD_defaultAllowed = 1 +} ZSTD_defaultPolicy_e; + +symbolEncodingType_e +ZSTD_selectEncodingType( + FSE_repeat* repeatMode, unsigned const* count, unsigned const max, + size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, + FSE_CTable const* prevCTable, + short const* defaultNorm, U32 defaultNormLog, + ZSTD_defaultPolicy_e const isDefaultAllowed, + ZSTD_strategy const strategy); + +size_t +ZSTD_buildCTable(void* dst, size_t dstCapacity, + FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, + unsigned* count, U32 max, + const BYTE* codeTable, size_t nbSeq, + const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, + const FSE_CTable* prevCTable, size_t prevCTableSize, + void* entropyWorkspace, size_t entropyWorkspaceSize); + +size_t ZSTD_encodeSequences( + void* dst, size_t dstCapacity, + FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, + FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, + FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, + seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); + +size_t ZSTD_fseBitCost( + FSE_CTable const* ctable, + unsigned const* count, + unsigned const max); + +size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, + unsigned const* count, unsigned const max); +#endif /* ZSTD_COMPRESS_SEQUENCES_H */ diff --git a/lib/zstd/compress/zstd_compress_superblock.c b/lib/zstd/compress/zstd_compress_superblock.c new file mode 100644 index 000000000000..767f73f5bf3d --- /dev/null +++ b/lib/zstd/compress/zstd_compress_superblock.c @@ -0,0 +1,850 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + /*-************************************* + * Dependencies + ***************************************/ +#include "zstd_compress_superblock.h" + +#include "../common/zstd_internal.h" /* ZSTD_getSequenceLength */ +#include "hist.h" /* HIST_countFast_wksp */ +#include "zstd_compress_internal.h" +#include "zstd_compress_sequences.h" +#include "zstd_compress_literals.h" + +/*-************************************* +* Superblock entropy buffer structs +***************************************/ +/** ZSTD_hufCTablesMetadata_t : + * Stores Literals Block Type for a super-block in hType, and + * huffman tree description in hufDesBuffer. + * hufDesSize refers to the size of huffman tree description in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */ +typedef struct { + symbolEncodingType_e hType; + BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE]; + size_t hufDesSize; +} ZSTD_hufCTablesMetadata_t; + +/** ZSTD_fseCTablesMetadata_t : + * Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and + * fse tables in fseTablesBuffer. + * fseTablesSize refers to the size of fse tables in bytes. + * This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */ +typedef struct { + symbolEncodingType_e llType; + symbolEncodingType_e ofType; + symbolEncodingType_e mlType; + BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE]; + size_t fseTablesSize; + size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */ +} ZSTD_fseCTablesMetadata_t; + +typedef struct { + ZSTD_hufCTablesMetadata_t hufMetadata; + ZSTD_fseCTablesMetadata_t fseMetadata; +} ZSTD_entropyCTablesMetadata_t; + + +/** ZSTD_buildSuperBlockEntropy_literal() : + * Builds entropy for the super-block literals. + * Stores literals block type (raw, rle, compressed, repeat) and + * huffman description table to hufMetadata. + * @return : size of huffman description table or error code */ +static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize, + const ZSTD_hufCTables_t* prevHuf, + ZSTD_hufCTables_t* nextHuf, + ZSTD_hufCTablesMetadata_t* hufMetadata, + const int disableLiteralsCompression, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned); + BYTE* const nodeWksp = countWkspStart + countWkspSize; + const size_t nodeWkspSize = wkspEnd-nodeWksp; + unsigned maxSymbolValue = 255; + unsigned huffLog = HUF_TABLELOG_DEFAULT; + HUF_repeat repeat = prevHuf->repeatMode; + + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize); + + /* Prepare nextEntropy assuming reusing the existing table */ + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + + if (disableLiteralsCompression) { + DEBUGLOG(5, "set_basic - disabled"); + hufMetadata->hType = set_basic; + return 0; + } + + /* small ? don't even attempt compression (speed opt) */ +# define COMPRESS_LITERALS_SIZE_MIN 63 + { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; + if (srcSize <= minLitSize) { + DEBUGLOG(5, "set_basic - too small"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Scan input and build symbol stats */ + { size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize); + FORWARD_IF_ERROR(largest, "HIST_count_wksp failed"); + if (largest == srcSize) { + DEBUGLOG(5, "set_rle"); + hufMetadata->hType = set_rle; + return 0; + } + if (largest <= (srcSize >> 7)+4) { + DEBUGLOG(5, "set_basic - no gain"); + hufMetadata->hType = set_basic; + return 0; + } + } + + /* Validate the previous Huffman table */ + if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) { + repeat = HUF_repeat_none; + } + + /* Build Huffman Tree */ + ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable)); + huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); + { size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp, + maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp"); + huffLog = (U32)maxBits; + { /* Build and write the CTable */ + size_t const newCSize = HUF_estimateCompressedSize( + (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue); + size_t const hSize = HUF_writeCTable_wksp( + hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer), + (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog, + nodeWksp, nodeWkspSize); + /* Check against repeating the previous CTable */ + if (repeat != HUF_repeat_none) { + size_t const oldCSize = HUF_estimateCompressedSize( + (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue); + if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) { + DEBUGLOG(5, "set_repeat - smaller"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_repeat; + return 0; + } + } + if (newCSize + hSize >= srcSize) { + DEBUGLOG(5, "set_basic - no gains"); + ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); + hufMetadata->hType = set_basic; + return 0; + } + DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize); + hufMetadata->hType = set_compressed; + nextHuf->repeatMode = HUF_repeat_check; + return hSize; + } + } +} + +/** ZSTD_buildSuperBlockEntropy_sequences() : + * Builds entropy for the super-block sequences. + * Stores symbol compression modes and fse table to fseMetadata. + * @return : size of fse tables or error code */ +static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr, + const ZSTD_fseCTables_t* prevEntropy, + ZSTD_fseCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize) +{ + BYTE* const wkspStart = (BYTE*)workspace; + BYTE* const wkspEnd = wkspStart + wkspSize; + BYTE* const countWkspStart = wkspStart; + unsigned* const countWksp = (unsigned*)workspace; + const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned); + BYTE* const cTableWksp = countWkspStart + countWkspSize; + const size_t cTableWkspSize = wkspEnd-cTableWksp; + ZSTD_strategy const strategy = cctxParams->cParams.strategy; + FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable; + FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable; + FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable; + const BYTE* const ofCodeTable = seqStorePtr->ofCode; + const BYTE* const llCodeTable = seqStorePtr->llCode; + const BYTE* const mlCodeTable = seqStorePtr->mlCode; + size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; + BYTE* const ostart = fseMetadata->fseTablesBuffer; + BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer); + BYTE* op = ostart; + + assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE)); + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq); + ZSTD_memset(workspace, 0, wkspSize); + + fseMetadata->lastCountSize = 0; + /* convert length/distances into codes */ + ZSTD_seqToCodes(seqStorePtr); + /* build CTable for Literal Lengths */ + { U32 LLtype; + unsigned max = MaxLL; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building LL table"); + nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode; + LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + LLFSELog, prevEntropy->litlengthCTable, + LL_defaultNorm, LL_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(set_basic < set_compressed && set_rle < set_compressed); + assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, + countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, + prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed"); + if (LLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->llType = (symbolEncodingType_e) LLtype; + } } + /* build CTable for Offsets */ + { U32 Offtype; + unsigned max = MaxOff; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */ + ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed; + DEBUGLOG(5, "Building OF table"); + nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode; + Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, + countWksp, max, mostFrequent, nbSeq, + OffFSELog, prevEntropy->offcodeCTable, + OF_defaultNorm, OF_defaultNormLog, + defaultPolicy, strategy); + assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, + countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed"); + if (Offtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->ofType = (symbolEncodingType_e) Offtype; + } } + /* build CTable for MatchLengths */ + { U32 MLtype; + unsigned max = MaxML; + size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize); /* can't fail */ + DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op)); + nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode; + MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, + countWksp, max, mostFrequent, nbSeq, + MLFSELog, prevEntropy->matchlengthCTable, + ML_defaultNorm, ML_defaultNormLog, + ZSTD_defaultAllowed, strategy); + assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ + { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, + countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, + prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable), + cTableWksp, cTableWkspSize); + FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed"); + if (MLtype == set_compressed) + fseMetadata->lastCountSize = countSize; + op += countSize; + fseMetadata->mlType = (symbolEncodingType_e) MLtype; + } } + assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer)); + return op-ostart; +} + + +/** ZSTD_buildSuperBlockEntropy() : + * Builds entropy for the super-block. + * @return : 0 on success or error code */ +static size_t +ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr, + const ZSTD_entropyCTables_t* prevEntropy, + ZSTD_entropyCTables_t* nextEntropy, + const ZSTD_CCtx_params* cctxParams, + ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize) +{ + size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart; + DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy"); + entropyMetadata->hufMetadata.hufDesSize = + ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize, + &prevEntropy->huf, &nextEntropy->huf, + &entropyMetadata->hufMetadata, + ZSTD_disableLiteralsCompression(cctxParams), + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed"); + entropyMetadata->fseMetadata.fseTablesSize = + ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr, + &prevEntropy->fse, &nextEntropy->fse, + cctxParams, + &entropyMetadata->fseMetadata, + workspace, wkspSize); + FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed"); + return 0; +} + +/** ZSTD_compressSubBlock_literal() : + * Compresses literals section for a sub-block. + * When we have to write the Huffman table we will sometimes choose a header + * size larger than necessary. This is because we have to pick the header size + * before we know the table size + compressed size, so we have a bound on the + * table size. If we guessed incorrectly, we fall back to uncompressed literals. + * + * We write the header when writeEntropy=1 and set entropyWritten=1 when we succeeded + * in writing the header, otherwise it is set to 0. + * + * hufMetadata->hType has literals block type info. + * If it is set_basic, all sub-blocks literals section will be Raw_Literals_Block. + * If it is set_rle, all sub-blocks literals section will be RLE_Literals_Block. + * If it is set_compressed, first sub-block's literals section will be Compressed_Literals_Block + * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block + * and the following sub-blocks' literals sections will be Treeless_Literals_Block. + * @return : compressed size of literals section of a sub-block + * Or 0 if it unable to compress. + * Or error code */ +static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + const BYTE* literals, size_t litSize, + void* dst, size_t dstSize, + const int bmi2, int writeEntropy, int* entropyWritten) +{ + size_t const header = writeEntropy ? 200 : 0; + size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header)); + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstSize; + BYTE* op = ostart + lhSize; + U32 const singleStream = lhSize == 3; + symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat; + size_t cLitSize = 0; + + (void)bmi2; /* TODO bmi2... */ + + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy); + + *entropyWritten = 0; + if (litSize == 0 || hufMetadata->hType == set_basic) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } else if (hufMetadata->hType == set_rle) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using rle literal"); + return ZSTD_compressRleLiteralsBlock(dst, dstSize, literals, litSize); + } + + assert(litSize > 0); + assert(hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat); + + if (writeEntropy && hufMetadata->hType == set_compressed) { + ZSTD_memcpy(op, hufMetadata->hufDesBuffer, hufMetadata->hufDesSize); + op += hufMetadata->hufDesSize; + cLitSize += hufMetadata->hufDesSize; + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize); + } + + /* TODO bmi2 */ + { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable) + : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable); + op += cSize; + cLitSize += cSize; + if (cSize == 0 || ERR_isError(cSize)) { + DEBUGLOG(5, "Failed to write entropy tables %s", ZSTD_getErrorName(cSize)); + return 0; + } + /* If we expand and we aren't writing a header then emit uncompressed */ + if (!writeEntropy && cLitSize >= litSize) { + DEBUGLOG(5, "ZSTD_compressSubBlock_literal using raw literal because uncompressible"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + /* If we are writing headers then allow expansion that doesn't change our header size. */ + if (lhSize < (size_t)(3 + (cLitSize >= 1 KB) + (cLitSize >= 16 KB))) { + assert(cLitSize > litSize); + DEBUGLOG(5, "Literals expanded beyond allowed header size"); + return ZSTD_noCompressLiterals(dst, dstSize, literals, litSize); + } + DEBUGLOG(5, "ZSTD_compressSubBlock_literal (cSize=%zu)", cSize); + } + + /* Build header */ + switch(lhSize) + { + case 3: /* 2 - 2 - 10 - 10 */ + { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14); + MEM_writeLE24(ostart, lhc); + break; + } + case 4: /* 2 - 2 - 14 - 14 */ + { U32 const lhc = hType + (2 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<18); + MEM_writeLE32(ostart, lhc); + break; + } + case 5: /* 2 - 2 - 18 - 18 */ + { U32 const lhc = hType + (3 << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<22); + MEM_writeLE32(ostart, lhc); + ostart[4] = (BYTE)(cLitSize >> 10); + break; + } + default: /* not possible : lhSize is {3,4,5} */ + assert(0); + } + *entropyWritten = 1; + DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart)); + return op-ostart; +} + +static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) { + const seqDef* const sstart = sequences; + const seqDef* const send = sequences + nbSeq; + const seqDef* sp = sstart; + size_t matchLengthSum = 0; + size_t litLengthSum = 0; + while (send-sp > 0) { + ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp); + litLengthSum += seqLen.litLength; + matchLengthSum += seqLen.matchLength; + sp++; + } + assert(litLengthSum <= litSize); + if (!lastSequence) { + assert(litLengthSum == litSize); + } + return matchLengthSum + litSize; +} + +/** ZSTD_compressSubBlock_sequences() : + * Compresses sequences section for a sub-block. + * fseMetadata->llType, fseMetadata->ofType, and fseMetadata->mlType have + * symbol compression modes for the super-block. + * The first successfully compressed block will have these in its header. + * We set entropyWritten=1 when we succeed in compressing the sequences. + * The following sub-blocks will always have repeat mode. + * @return : compressed size of sequences section of a sub-block + * Or 0 if it is unable to compress + * Or error code. */ +static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, int writeEntropy, int* entropyWritten) +{ + const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + BYTE* seqHead; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (nbSeq=%zu, writeEntropy=%d, longOffsets=%d)", nbSeq, writeEntropy, longOffsets); + + *entropyWritten = 0; + /* Sequences Header */ + RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, + dstSize_tooSmall, ""); + if (nbSeq < 0x7F) + *op++ = (BYTE)nbSeq; + else if (nbSeq < LONGNBSEQ) + op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; + else + op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; + if (nbSeq==0) { + return op - ostart; + } + + /* seqHead : flags for FSE encoding type */ + seqHead = op++; + + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (seqHeadSize=%u)", (unsigned)(op-ostart)); + + if (writeEntropy) { + const U32 LLtype = fseMetadata->llType; + const U32 Offtype = fseMetadata->ofType; + const U32 MLtype = fseMetadata->mlType; + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (fseTablesSize=%zu)", fseMetadata->fseTablesSize); + *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); + ZSTD_memcpy(op, fseMetadata->fseTablesBuffer, fseMetadata->fseTablesSize); + op += fseMetadata->fseTablesSize; + } else { + const U32 repeat = set_repeat; + *seqHead = (BYTE)((repeat<<6) + (repeat<<4) + (repeat<<2)); + } + + { size_t const bitstreamSize = ZSTD_encodeSequences( + op, oend - op, + fseTables->matchlengthCTable, mlCode, + fseTables->offcodeCTable, ofCode, + fseTables->litlengthCTable, llCode, + sequences, nbSeq, + longOffsets, bmi2); + FORWARD_IF_ERROR(bitstreamSize, "ZSTD_encodeSequences failed"); + op += bitstreamSize; + /* zstd versions <= 1.3.4 mistakenly report corruption when + * FSE_readNCount() receives a buffer < 4 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1146. + * This can happen when the last set_compressed table present is 2 + * bytes and the bitstream is only one byte. + * In this exceedingly rare case, we will simply emit an uncompressed + * block, since it isn't worth optimizing. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (writeEntropy && fseMetadata->lastCountSize && fseMetadata->lastCountSize + bitstreamSize < 4) { + /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */ + assert(fseMetadata->lastCountSize + bitstreamSize == 3); + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by " + "emitting an uncompressed block."); + return 0; + } +#endif + DEBUGLOG(5, "ZSTD_compressSubBlock_sequences (bitstreamSize=%zu)", bitstreamSize); + } + + /* zstd versions <= 1.4.0 mistakenly report error when + * sequences section body size is less than 3 bytes. + * Fixed by https://github.com/facebook/zstd/pull/1664. + * This can happen when the previous sequences section block is compressed + * with rle mode and the current block's sequences section is compressed + * with repeat mode where sequences section body size can be 1 byte. + */ +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + if (op-seqHead < 4) { + DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.4.0 by emitting " + "an uncompressed block when sequences are < 4 bytes"); + return 0; + } +#endif + + *entropyWritten = 1; + return op - ostart; +} + +/** ZSTD_compressSubBlock() : + * Compresses a single sub-block. + * @return : compressed size of the sub-block + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const seqDef* sequences, size_t nbSeq, + const BYTE* literals, size_t litSize, + const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const int bmi2, + int writeLitEntropy, int writeSeqEntropy, + int* litEntropyWritten, int* seqEntropyWritten, + U32 lastBlock) +{ + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart + ZSTD_blockHeaderSize; + DEBUGLOG(5, "ZSTD_compressSubBlock (litSize=%zu, nbSeq=%zu, writeLitEntropy=%d, writeSeqEntropy=%d, lastBlock=%d)", + litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock); + { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable, + &entropyMetadata->hufMetadata, literals, litSize, + op, oend-op, bmi2, writeLitEntropy, litEntropyWritten); + FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed"); + if (cLitSize == 0) return 0; + op += cLitSize; + } + { size_t cSeqSize = ZSTD_compressSubBlock_sequences(&entropy->fse, + &entropyMetadata->fseMetadata, + sequences, nbSeq, + llCode, mlCode, ofCode, + cctxParams, + op, oend-op, + bmi2, writeSeqEntropy, seqEntropyWritten); + FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed"); + if (cSeqSize == 0) return 0; + op += cSeqSize; + } + /* Write block header */ + { size_t cSize = (op-ostart)-ZSTD_blockHeaderSize; + U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); + MEM_writeLE24(ostart, cBlockHeader24); + } + return op-ostart; +} + +static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize, + const ZSTD_hufCTables_t* huf, + const ZSTD_hufCTablesMetadata_t* hufMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + unsigned* const countWksp = (unsigned*)workspace; + unsigned maxSymbolValue = 255; + size_t literalSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + + if (hufMetadata->hType == set_basic) return litSize; + else if (hufMetadata->hType == set_rle) return 1; + else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) { + size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize); + if (ZSTD_isError(largest)) return litSize; + { size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue); + if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize; + return cLitSizeEstimate + literalSectionHeaderSize; + } } + assert(0); /* impossible */ + return 0; +} + +static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type, + const BYTE* codeTable, unsigned maxCode, + size_t nbSeq, const FSE_CTable* fseCTable, + const U32* additionalBits, + short const* defaultNorm, U32 defaultNormLog, U32 defaultMax, + void* workspace, size_t wkspSize) +{ + unsigned* const countWksp = (unsigned*)workspace; + const BYTE* ctp = codeTable; + const BYTE* const ctStart = ctp; + const BYTE* const ctEnd = ctStart + nbSeq; + size_t cSymbolTypeSizeEstimateInBits = 0; + unsigned max = maxCode; + + HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize); /* can't fail */ + if (type == set_basic) { + /* We selected this encoding type, so it must be valid. */ + assert(max <= defaultMax); + cSymbolTypeSizeEstimateInBits = max <= defaultMax + ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max) + : ERROR(GENERIC); + } else if (type == set_rle) { + cSymbolTypeSizeEstimateInBits = 0; + } else if (type == set_compressed || type == set_repeat) { + cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max); + } + if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) return nbSeq * 10; + while (ctp < ctEnd) { + if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp]; + else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */ + ctp++; + } + return cSymbolTypeSizeEstimateInBits / 8; +} + +static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_fseCTables_t* fseTables, + const ZSTD_fseCTablesMetadata_t* fseMetadata, + void* workspace, size_t wkspSize, + int writeEntropy) +{ + size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */ + size_t cSeqSizeEstimate = 0; + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff, + nbSeq, fseTables->offcodeCTable, NULL, + OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->llType, llCodeTable, MaxLL, + nbSeq, fseTables->litlengthCTable, LL_bits, + LL_defaultNorm, LL_defaultNormLog, MaxLL, + workspace, wkspSize); + cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, MaxML, + nbSeq, fseTables->matchlengthCTable, ML_bits, + ML_defaultNorm, ML_defaultNormLog, MaxML, + workspace, wkspSize); + if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize; + return cSeqSizeEstimate + sequencesSectionHeaderSize; +} + +static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, + const BYTE* ofCodeTable, + const BYTE* llCodeTable, + const BYTE* mlCodeTable, + size_t nbSeq, + const ZSTD_entropyCTables_t* entropy, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + void* workspace, size_t wkspSize, + int writeLitEntropy, int writeSeqEntropy) { + size_t cSizeEstimate = 0; + cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); + cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, + workspace, wkspSize, writeSeqEntropy); + return cSizeEstimate + ZSTD_blockHeaderSize; +} + +static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) +{ + if (fseMetadata->llType == set_compressed || fseMetadata->llType == set_rle) + return 1; + if (fseMetadata->mlType == set_compressed || fseMetadata->mlType == set_rle) + return 1; + if (fseMetadata->ofType == set_compressed || fseMetadata->ofType == set_rle) + return 1; + return 0; +} + +/** ZSTD_compressSubBlock_multi() : + * Breaks super-block into multiple sub-blocks and compresses them. + * Entropy will be written to the first block. + * The following blocks will use repeat mode to compress. + * All sub-blocks are compressed blocks (no raw or rle blocks). + * @return : compressed size of the super block (which is multiple ZSTD blocks) + * Or 0 if it failed to compress. */ +static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, + const ZSTD_compressedBlockState_t* prevCBlock, + ZSTD_compressedBlockState_t* nextCBlock, + const ZSTD_entropyCTablesMetadata_t* entropyMetadata, + const ZSTD_CCtx_params* cctxParams, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const int bmi2, U32 lastBlock, + void* workspace, size_t wkspSize) +{ + const seqDef* const sstart = seqStorePtr->sequencesStart; + const seqDef* const send = seqStorePtr->sequences; + const seqDef* sp = sstart; + const BYTE* const lstart = seqStorePtr->litStart; + const BYTE* const lend = seqStorePtr->lit; + const BYTE* lp = lstart; + BYTE const* ip = (BYTE const*)src; + BYTE const* const iend = ip + srcSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + dstCapacity; + BYTE* op = ostart; + const BYTE* llCodePtr = seqStorePtr->llCode; + const BYTE* mlCodePtr = seqStorePtr->mlCode; + const BYTE* ofCodePtr = seqStorePtr->ofCode; + size_t targetCBlockSize = cctxParams->targetCBlockSize; + size_t litSize, seqCount; + int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed; + int writeSeqEntropy = 1; + int lastSequence = 0; + + DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)", + (unsigned)(lend-lp), (unsigned)(send-sstart)); + + litSize = 0; + seqCount = 0; + do { + size_t cBlockSizeEstimate = 0; + if (sstart == send) { + lastSequence = 1; + } else { + const seqDef* const sequence = sp + seqCount; + lastSequence = sequence == send - 1; + litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength; + seqCount++; + } + if (lastSequence) { + assert(lp <= lend); + assert(litSize <= (size_t)(lend - lp)); + litSize = (size_t)(lend - lp); + } + /* I think there is an optimization opportunity here. + * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful + * since it recalculates estimate from scratch. + * For example, it would recount literal distribution and symbol codes everytime. + */ + cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount, + &nextCBlock->entropy, entropyMetadata, + workspace, wkspSize, writeLitEntropy, writeSeqEntropy); + if (cBlockSizeEstimate > targetCBlockSize || lastSequence) { + int litEntropyWritten = 0; + int seqEntropyWritten = 0; + const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence); + const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, + sp, seqCount, + lp, litSize, + llCodePtr, mlCodePtr, ofCodePtr, + cctxParams, + op, oend-op, + bmi2, writeLitEntropy, writeSeqEntropy, + &litEntropyWritten, &seqEntropyWritten, + lastBlock && lastSequence); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); + if (cSize > 0 && cSize < decompressedSize) { + DEBUGLOG(5, "Committed the sub-block"); + assert(ip + decompressedSize <= iend); + ip += decompressedSize; + sp += seqCount; + lp += litSize; + op += cSize; + llCodePtr += seqCount; + mlCodePtr += seqCount; + ofCodePtr += seqCount; + litSize = 0; + seqCount = 0; + /* Entropy only needs to be written once */ + if (litEntropyWritten) { + writeLitEntropy = 0; + } + if (seqEntropyWritten) { + writeSeqEntropy = 0; + } + } + } + } while (!lastSequence); + if (writeLitEntropy) { + DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten"); + ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); + } + if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) { + /* If we haven't written our entropy tables, then we've violated our contract and + * must emit an uncompressed block. + */ + DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten"); + return 0; + } + if (ip < iend) { + size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock); + DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip)); + FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); + assert(cSize != 0); + op += cSize; + /* We have to regenerate the repcodes because we've skipped some sequences */ + if (sp < send) { + seqDef const* seq; + repcodes_t rep; + ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep)); + for (seq = sstart; seq < sp; ++seq) { + rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0); + } + ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep)); + } + } + DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed"); + return op-ostart; +} + +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock) { + ZSTD_entropyCTablesMetadata_t entropyMetadata; + + FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore, + &zc->blockState.prevCBlock->entropy, + &zc->blockState.nextCBlock->entropy, + &zc->appliedParams, + &entropyMetadata, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), ""); + + return ZSTD_compressSubBlock_multi(&zc->seqStore, + zc->blockState.prevCBlock, + zc->blockState.nextCBlock, + &entropyMetadata, + &zc->appliedParams, + dst, dstCapacity, + src, srcSize, + zc->bmi2, lastBlock, + zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */); +} diff --git a/lib/zstd/compress/zstd_compress_superblock.h b/lib/zstd/compress/zstd_compress_superblock.h new file mode 100644 index 000000000000..224ece79546e --- /dev/null +++ b/lib/zstd/compress/zstd_compress_superblock.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_COMPRESS_ADVANCED_H +#define ZSTD_COMPRESS_ADVANCED_H + +/*-************************************* +* Dependencies +***************************************/ + +#include /* ZSTD_CCtx */ + +/*-************************************* +* Target Compressed Block Size +***************************************/ + +/* ZSTD_compressSuperBlock() : + * Used to compress a super block when targetCBlockSize is being used. + * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */ +size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc, + void* dst, size_t dstCapacity, + void const* src, size_t srcSize, + unsigned lastBlock); + +#endif /* ZSTD_COMPRESS_ADVANCED_H */ diff --git a/lib/zstd/compress/zstd_cwksp.h b/lib/zstd/compress/zstd_cwksp.h new file mode 100644 index 000000000000..c231cc500ef5 --- /dev/null +++ b/lib/zstd/compress/zstd_cwksp.h @@ -0,0 +1,482 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_CWKSP_H +#define ZSTD_CWKSP_H + +/*-************************************* +* Dependencies +***************************************/ +#include "../common/zstd_internal.h" + + +/*-************************************* +* Constants +***************************************/ + +/* Since the workspace is effectively its own little malloc implementation / + * arena, when we run under ASAN, we should similarly insert redzones between + * each internal element of the workspace, so ASAN will catch overruns that + * reach outside an object but that stay inside the workspace. + * + * This defines the size of that redzone. + */ +#ifndef ZSTD_CWKSP_ASAN_REDZONE_SIZE +#define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128 +#endif + +/*-************************************* +* Structures +***************************************/ +typedef enum { + ZSTD_cwksp_alloc_objects, + ZSTD_cwksp_alloc_buffers, + ZSTD_cwksp_alloc_aligned +} ZSTD_cwksp_alloc_phase_e; + +/** + * Used to describe whether the workspace is statically allocated (and will not + * necessarily ever be freed), or if it's dynamically allocated and we can + * expect a well-formed caller to free this. + */ +typedef enum { + ZSTD_cwksp_dynamic_alloc, + ZSTD_cwksp_static_alloc +} ZSTD_cwksp_static_alloc_e; + +/** + * Zstd fits all its internal datastructures into a single continuous buffer, + * so that it only needs to perform a single OS allocation (or so that a buffer + * can be provided to it and it can perform no allocations at all). This buffer + * is called the workspace. + * + * Several optimizations complicate that process of allocating memory ranges + * from this workspace for each internal datastructure: + * + * - These different internal datastructures have different setup requirements: + * + * - The static objects need to be cleared once and can then be trivially + * reused for each compression. + * + * - Various buffers don't need to be initialized at all--they are always + * written into before they're read. + * + * - The matchstate tables have a unique requirement that they don't need + * their memory to be totally cleared, but they do need the memory to have + * some bound, i.e., a guarantee that all values in the memory they've been + * allocated is less than some maximum value (which is the starting value + * for the indices that they will then use for compression). When this + * guarantee is provided to them, they can use the memory without any setup + * work. When it can't, they have to clear the area. + * + * - These buffers also have different alignment requirements. + * + * - We would like to reuse the objects in the workspace for multiple + * compressions without having to perform any expensive reallocation or + * reinitialization work. + * + * - We would like to be able to efficiently reuse the workspace across + * multiple compressions **even when the compression parameters change** and + * we need to resize some of the objects (where possible). + * + * To attempt to manage this buffer, given these constraints, the ZSTD_cwksp + * abstraction was created. It works as follows: + * + * Workspace Layout: + * + * [ ... workspace ... ] + * [objects][tables ... ->] free space [<- ... aligned][<- ... buffers] + * + * The various objects that live in the workspace are divided into the + * following categories, and are allocated separately: + * + * - Static objects: this is optionally the enclosing ZSTD_CCtx or ZSTD_CDict, + * so that literally everything fits in a single buffer. Note: if present, + * this must be the first object in the workspace, since ZSTD_customFree{CCtx, + * CDict}() rely on a pointer comparison to see whether one or two frees are + * required. + * + * - Fixed size objects: these are fixed-size, fixed-count objects that are + * nonetheless "dynamically" allocated in the workspace so that we can + * control how they're initialized separately from the broader ZSTD_CCtx. + * Examples: + * - Entropy Workspace + * - 2 x ZSTD_compressedBlockState_t + * - CDict dictionary contents + * + * - Tables: these are any of several different datastructures (hash tables, + * chain tables, binary trees) that all respect a common format: they are + * uint32_t arrays, all of whose values are between 0 and (nextSrc - base). + * Their sizes depend on the cparams. + * + * - Aligned: these buffers are used for various purposes that require 4 byte + * alignment, but don't require any initialization before they're used. + * + * - Buffers: these buffers are used for various purposes that don't require + * any alignment or initialization before they're used. This means they can + * be moved around at no cost for a new compression. + * + * Allocating Memory: + * + * The various types of objects must be allocated in order, so they can be + * correctly packed into the workspace buffer. That order is: + * + * 1. Objects + * 2. Buffers + * 3. Aligned + * 4. Tables + * + * Attempts to reserve objects of different types out of order will fail. + */ +typedef struct { + void* workspace; + void* workspaceEnd; + + void* objectEnd; + void* tableEnd; + void* tableValidEnd; + void* allocStart; + + BYTE allocFailed; + int workspaceOversizedDuration; + ZSTD_cwksp_alloc_phase_e phase; + ZSTD_cwksp_static_alloc_e isStatic; +} ZSTD_cwksp; + +/*-************************************* +* Functions +***************************************/ + +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws); + +MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) { + (void)ws; + assert(ws->workspace <= ws->objectEnd); + assert(ws->objectEnd <= ws->tableEnd); + assert(ws->objectEnd <= ws->tableValidEnd); + assert(ws->tableEnd <= ws->allocStart); + assert(ws->tableValidEnd <= ws->allocStart); + assert(ws->allocStart <= ws->workspaceEnd); +} + +/** + * Align must be a power of 2. + */ +MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) { + size_t const mask = align - 1; + assert((align & mask) == 0); + return (size + mask) & ~mask; +} + +/** + * Use this to determine how much space in the workspace we will consume to + * allocate this object. (Normally it should be exactly the size of the object, + * but under special conditions, like ASAN, where we pad each object, it might + * be larger.) + * + * Since tables aren't currently redzoned, you don't need to call through this + * to figure out how much space you need for the matchState tables. Everything + * else is though. + */ +MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) { + if (size == 0) + return 0; + return size; +} + +MEM_STATIC void ZSTD_cwksp_internal_advance_phase( + ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) { + assert(phase >= ws->phase); + if (phase > ws->phase) { + if (ws->phase < ZSTD_cwksp_alloc_buffers && + phase >= ZSTD_cwksp_alloc_buffers) { + ws->tableValidEnd = ws->objectEnd; + } + if (ws->phase < ZSTD_cwksp_alloc_aligned && + phase >= ZSTD_cwksp_alloc_aligned) { + /* If unaligned allocations down from a too-large top have left us + * unaligned, we need to realign our alloc ptr. Technically, this + * can consume space that is unaccounted for in the neededSpace + * calculation. However, I believe this can only happen when the + * workspace is too large, and specifically when it is too large + * by a larger margin than the space that will be consumed. */ + /* TODO: cleaner, compiler warning friendly way to do this??? */ + ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1)); + if (ws->allocStart < ws->tableValidEnd) { + ws->tableValidEnd = ws->allocStart; + } + } + ws->phase = phase; + } +} + +/** + * Returns whether this object/buffer/etc was allocated in this workspace. + */ +MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) { + return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd); +} + +/** + * Internal function. Do not use directly. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_internal( + ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) { + void* alloc; + void* bottom = ws->tableEnd; + ZSTD_cwksp_internal_advance_phase(ws, phase); + alloc = (BYTE *)ws->allocStart - bytes; + + if (bytes == 0) + return NULL; + + + DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(alloc >= bottom); + if (alloc < bottom) { + DEBUGLOG(4, "cwksp: alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + if (alloc < ws->tableValidEnd) { + ws->tableValidEnd = alloc; + } + ws->allocStart = alloc; + + + return alloc; +} + +/** + * Reserves and returns unaligned memory. + */ +MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) { + return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers); +} + +/** + * Reserves and returns memory sized on and aligned on sizeof(unsigned). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) { + assert((bytes & (sizeof(U32)-1)) == 0); + return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned); +} + +/** + * Aligned on sizeof(unsigned). These buffers have the special property that + * their values remain constrained, allowing us to re-use them without + * memset()-ing them. + */ +MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) { + const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned; + void* alloc = ws->tableEnd; + void* end = (BYTE *)alloc + bytes; + void* top = ws->allocStart; + + DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining", + alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes); + assert((bytes & (sizeof(U32)-1)) == 0); + ZSTD_cwksp_internal_advance_phase(ws, phase); + ZSTD_cwksp_assert_internal_consistency(ws); + assert(end <= top); + if (end > top) { + DEBUGLOG(4, "cwksp: table alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->tableEnd = end; + + + return alloc; +} + +/** + * Aligned on sizeof(void*). + */ +MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) { + size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*)); + void* alloc = ws->objectEnd; + void* end = (BYTE*)alloc + roundedBytes; + + + DEBUGLOG(5, + "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining", + alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes); + assert(((size_t)alloc & (sizeof(void*)-1)) == 0); + assert((bytes & (sizeof(void*)-1)) == 0); + ZSTD_cwksp_assert_internal_consistency(ws); + /* we must be in the first phase, no advance is possible */ + if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) { + DEBUGLOG(4, "cwksp: object alloc failed!"); + ws->allocFailed = 1; + return NULL; + } + ws->objectEnd = end; + ws->tableEnd = end; + ws->tableValidEnd = end; + + + return alloc; +} + +MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty"); + + + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + ws->tableValidEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC void ZSTD_cwksp_mark_tables_clean(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_clean"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + ws->tableValidEnd = ws->tableEnd; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * Zero the part of the allocated tables not already marked clean. + */ +MEM_STATIC void ZSTD_cwksp_clean_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: ZSTD_cwksp_clean_tables"); + assert(ws->tableValidEnd >= ws->objectEnd); + assert(ws->tableValidEnd <= ws->allocStart); + if (ws->tableValidEnd < ws->tableEnd) { + ZSTD_memset(ws->tableValidEnd, 0, (BYTE*)ws->tableEnd - (BYTE*)ws->tableValidEnd); + } + ZSTD_cwksp_mark_tables_clean(ws); +} + +/** + * Invalidates table allocations. + * All other allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear_tables(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing tables!"); + + + ws->tableEnd = ws->objectEnd; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * Invalidates all buffer, aligned, and table allocations. + * Object allocations remain valid. + */ +MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) { + DEBUGLOG(4, "cwksp: clearing!"); + + + + ws->tableEnd = ws->objectEnd; + ws->allocStart = ws->workspaceEnd; + ws->allocFailed = 0; + if (ws->phase > ZSTD_cwksp_alloc_buffers) { + ws->phase = ZSTD_cwksp_alloc_buffers; + } + ZSTD_cwksp_assert_internal_consistency(ws); +} + +/** + * The provided workspace takes ownership of the buffer [start, start+size). + * Any existing values in the workspace are ignored (the previously managed + * buffer, if present, must be separately freed). + */ +MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size, ZSTD_cwksp_static_alloc_e isStatic) { + DEBUGLOG(4, "cwksp: init'ing workspace with %zd bytes", size); + assert(((size_t)start & (sizeof(void*)-1)) == 0); /* ensure correct alignment */ + ws->workspace = start; + ws->workspaceEnd = (BYTE*)start + size; + ws->objectEnd = ws->workspace; + ws->tableValidEnd = ws->objectEnd; + ws->phase = ZSTD_cwksp_alloc_objects; + ws->isStatic = isStatic; + ZSTD_cwksp_clear(ws); + ws->workspaceOversizedDuration = 0; + ZSTD_cwksp_assert_internal_consistency(ws); +} + +MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) { + void* workspace = ZSTD_customMalloc(size, customMem); + DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size); + RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!"); + ZSTD_cwksp_init(ws, workspace, size, ZSTD_cwksp_dynamic_alloc); + return 0; +} + +MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) { + void *ptr = ws->workspace; + DEBUGLOG(4, "cwksp: freeing workspace"); + ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp)); + ZSTD_customFree(ptr, customMem); +} + +/** + * Moves the management of a workspace from one cwksp to another. The src cwksp + * is left in an invalid state (src must be re-init()'ed before it's used again). + */ +MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) { + *dst = *src; + ZSTD_memset(src, 0, sizeof(ZSTD_cwksp)); +} + +MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace); +} + +MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace) + + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart); +} + +MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) { + return ws->allocFailed; +} + +/*-************************************* +* Functions Checking Free Space +***************************************/ + +MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) { + return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd); +} + +MEM_STATIC int ZSTD_cwksp_check_available(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_available_space(ws) >= additionalNeededSpace; +} + +MEM_STATIC int ZSTD_cwksp_check_too_large(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_available( + ws, additionalNeededSpace * ZSTD_WORKSPACETOOLARGE_FACTOR); +} + +MEM_STATIC int ZSTD_cwksp_check_wasteful(ZSTD_cwksp* ws, size_t additionalNeededSpace) { + return ZSTD_cwksp_check_too_large(ws, additionalNeededSpace) + && ws->workspaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +MEM_STATIC void ZSTD_cwksp_bump_oversized_duration( + ZSTD_cwksp* ws, size_t additionalNeededSpace) { + if (ZSTD_cwksp_check_too_large(ws, additionalNeededSpace)) { + ws->workspaceOversizedDuration++; + } else { + ws->workspaceOversizedDuration = 0; + } +} + + +#endif /* ZSTD_CWKSP_H */ diff --git a/lib/zstd/compress/zstd_double_fast.c b/lib/zstd/compress/zstd_double_fast.c new file mode 100644 index 000000000000..b99172e9d2e4 --- /dev/null +++ b/lib/zstd/compress/zstd_double_fast.c @@ -0,0 +1,521 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "zstd_double_fast.h" + + +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashLarge = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32 const mls = cParams->minMatch; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash tables. + * Insert the other positions into the large hash table if their entry + * is empty. + */ + for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) { + U32 const curr = (U32)(ip - base); + U32 i; + for (i = 0; i < fastHashFillStep; ++i) { + size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls); + size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8); + if (i == 0) + hashSmall[smHash] = curr + i; + if (i == 0 || hashLarge[lgHash] == 0) + hashLarge[lgHash] = curr + i; + /* Only load extra positions for ZSTD_dtlm_full */ + if (dtlm == ZSTD_dtlm_fast) + break; + } } +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_doubleFast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls /* template */, ZSTD_dictMode_e const dictMode) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + const U32 hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + const U32 hBitsS = cParams->chainLog; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + /* presumes that, if there is a dictionary, it must be using Attach mode */ + const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixLowest = base + prefixLowestIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dictCParams = + dictMode == ZSTD_dictMatchState ? + &dms->cParams : NULL; + const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ? + dms->hashTable : NULL; + const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ? + dms->chainTable : NULL; + const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? + dms->window.dictLimit : 0; + const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? + dms->window.base : NULL; + const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? + dictBase + dictStartIndex : NULL; + const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? + dms->window.nextSrc : NULL; + const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ? + dictCParams->hashLog : hBitsL; + const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ? + dictCParams->chainLog : hBitsS; + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart)); + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); + + assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); + + /* if a dictionary is attached, it must be within window range */ + if (dictMode == ZSTD_dictMatchState) { + assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex); + } + + /* init */ + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const curr = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + if (dictMode == ZSTD_dictMatchState) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + U32 offset; + size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8); + size_t const h = ZSTD_hashPtr(ip, hBitsS, mls); + size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8); + size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls); + U32 const curr = (U32)(ip-base); + U32 const matchIndexL = hashLong[h2]; + U32 matchIndexS = hashSmall[h]; + const BYTE* matchLong = base + matchIndexL; + const BYTE* match = base + matchIndexS; + const U32 repIndex = curr + 1 - offset_1; + const BYTE* repMatch = (dictMode == ZSTD_dictMatchState + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashLong[h2] = hashSmall[h] = curr; /* update hash tables */ + + /* check dictMatchState repcode */ + if (dictMode == ZSTD_dictMatchState + && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + goto _match_stored; + } + + /* check noDict repcode */ + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + goto _match_stored; + } + + if (matchIndexL > prefixLowestIndex) { + /* check prefix long match */ + if (MEM_read64(matchLong) == MEM_read64(ip)) { + mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8; + offset = (U32)(ip-matchLong); + while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + goto _match_found; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dictMatchState long match */ + U32 const dictMatchIndexL = dictHashLong[dictHL]; + const BYTE* dictMatchL = dictBase + dictMatchIndexL; + assert(dictMatchL < dictEnd); + + if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) { + mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8; + offset = (U32)(curr - dictMatchIndexL - dictIndexDelta); + while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ + goto _match_found; + } } + + if (matchIndexS > prefixLowestIndex) { + /* check prefix short match */ + if (MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dictMatchState short match */ + U32 const dictMatchIndexS = dictHashSmall[dictHS]; + match = dictBase + dictMatchIndexS; + matchIndexS = dictMatchIndexS + dictIndexDelta; + + if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { + goto _search_next_long; + } } + + ip += ((ip-anchor) >> kSearchStrength) + 1; +#if defined(__aarch64__) + PREFETCH_L1(ip+256); +#endif + continue; + +_search_next_long: + + { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); + U32 const matchIndexL3 = hashLong[hl3]; + const BYTE* matchL3 = base + matchIndexL3; + hashLong[hl3] = curr + 1; + + /* check prefix long +1 match */ + if (matchIndexL3 > prefixLowestIndex) { + if (MEM_read64(matchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8; + ip++; + offset = (U32)(ip-matchL3); + while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */ + goto _match_found; + } + } else if (dictMode == ZSTD_dictMatchState) { + /* check dict long +1 match */ + U32 const dictMatchIndexL3 = dictHashLong[dictHLNext]; + const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3; + assert(dictMatchL3 < dictEnd); + if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) { + mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8; + ip++; + offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta); + while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ + goto _match_found; + } } } + + /* if no long +1 match, explore the short match we found */ + if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { + mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4; + offset = (U32)(curr - matchIndexS); + while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } else { + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + offset = (U32)(ip - match); + while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + + /* fall-through */ + +_match_found: + offset_2 = offset_1; + offset_1 = offset; + + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + +_match_stored: + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + + /* check immediate repcode */ + if (dictMode == ZSTD_dictMatchState) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState + && repIndex2 < prefixLowestIndex ? + dictBase + repIndex2 - dictIndexDelta : + base + repIndex2; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } + + if (dictMode == ZSTD_noDict) { + while ( (ip <= ilimit) + && ( (offset_2>0) + & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base); + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base); + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + } /* while (ip < ilimit) */ + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const U32 mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); + } +} + + +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const U32 mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState); + case 5 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState); + case 6 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState); + case 7 : + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_compressBlock_doubleFast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls /* template */) +{ + ZSTD_compressionParameters const* cParams = &ms->cParams; + U32* const hashLong = ms->hashTable; + U32 const hBitsL = cParams->hashLog; + U32* const hashSmall = ms->chainTable; + U32 const hBitsS = cParams->chainLog; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 dictStartIndex = lowLimit; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + U32 offset_1=rep[0], offset_2=rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); + + /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); + const U32 matchIndex = hashSmall[hSmall]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + + const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8); + const U32 matchLongIndex = hashLong[hLong]; + const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base; + const BYTE* matchLong = matchLongBase + matchLongIndex; + + const U32 curr = (U32)(ip-base); + const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */ + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + size_t mLength; + hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */ + + if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ + & (repIndex > dictStartIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + } else { + if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { + const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart; + U32 offset; + mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8; + offset = curr - matchLongIndex; + while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { + size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); + U32 const matchIndex3 = hashLong[h3]; + const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base; + const BYTE* match3 = match3Base + matchIndex3; + U32 offset; + hashLong[h3] = curr + 1; + if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) { + const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8; + ip++; + offset = curr+1 - matchIndex3; + while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */ + } else { + const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + offset = curr - matchIndex; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + } + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + + } else { + ip += ((ip-anchor) >> kSearchStrength) + 1; + continue; + } } + + /* move to next sequence start */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Complementary insertion */ + /* done after iLimit test, as candidates could be > iend-8 */ + { U32 const indexToInsert = curr+2; + hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; + hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); + hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; + hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); + } + + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ + & (repIndex2 > dictStartIndex)) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2; + hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + } +} diff --git a/lib/zstd/compress/zstd_double_fast.h b/lib/zstd/compress/zstd_double_fast.h new file mode 100644 index 000000000000..6822bde65a1d --- /dev/null +++ b/lib/zstd/compress/zstd_double_fast.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_DOUBLE_FAST_H +#define ZSTD_DOUBLE_FAST_H + + +#include "../common/mem.h" /* U32 */ +#include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */ + +void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm); +size_t ZSTD_compressBlock_doubleFast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_doubleFast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + + +#endif /* ZSTD_DOUBLE_FAST_H */ diff --git a/lib/zstd/compress/zstd_fast.c b/lib/zstd/compress/zstd_fast.c new file mode 100644 index 000000000000..96b7d48e2868 --- /dev/null +++ b/lib/zstd/compress/zstd_fast.c @@ -0,0 +1,496 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */ +#include "zstd_fast.h" + + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + const void* const end, + ZSTD_dictTableLoadMethod_e dtlm) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hBits = cParams->hashLog; + U32 const mls = cParams->minMatch; + const BYTE* const base = ms->window.base; + const BYTE* ip = base + ms->nextToUpdate; + const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE; + const U32 fastHashFillStep = 3; + + /* Always insert every fastHashFillStep position into the hash table. + * Insert the other positions if their hash entry is empty. + */ + for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) { + U32 const curr = (U32)(ip - base); + size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls); + hashTable[hash0] = curr; + if (dtlm == ZSTD_dtlm_fast) continue; + /* Only load extra positions for ZSTD_dtlm_full */ + { U32 p; + for (p = 1; p < fastHashFillStep; ++p) { + size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls); + if (hashTable[hash] == 0) { /* not yet filled */ + hashTable[hash] = curr + p; + } } } } +} + + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_fast_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, + U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ + const BYTE* ip0 = istart; + const BYTE* ip1; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog); + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); + ip0 += (ip0 == prefixStart); + ip1 = ip0 + 1; + { U32 const curr = (U32)(ip0 - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; + if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; + } + + /* Main Search Loop */ +#ifdef __INTEL_COMPILER + /* From intel 'The vector pragma indicates that the loop should be + * vectorized if it is legal to do so'. Can be used together with + * #pragma ivdep (but have opted to exclude that because intel + * warns against using it).*/ + #pragma vector always +#endif + while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ + size_t mLength; + BYTE const* ip2 = ip0 + 2; + size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); + U32 const val0 = MEM_read32(ip0); + size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); + U32 const val1 = MEM_read32(ip1); + U32 const current0 = (U32)(ip0-base); + U32 const current1 = (U32)(ip1-base); + U32 const matchIndex0 = hashTable[h0]; + U32 const matchIndex1 = hashTable[h1]; + BYTE const* repMatch = ip2 - offset_1; + const BYTE* match0 = base + matchIndex0; + const BYTE* match1 = base + matchIndex1; + U32 offcode; + +#if defined(__aarch64__) + PREFETCH_L1(ip0+256); +#endif + + hashTable[h0] = current0; /* update hash table */ + hashTable[h1] = current1; /* update hash table */ + + assert(ip0 + 1 == ip1); + + if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { + mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0; + ip0 = ip2 - mLength; + match0 = repMatch - mLength; + mLength += 4; + offcode = 0; + goto _match; + } + if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { + /* found a regular match */ + goto _offset; + } + if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { + /* found a regular match after one literal */ + ip0 = ip1; + match0 = match1; + goto _offset; + } + { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize; + assert(step >= 2); + ip0 += step; + ip1 += step; + continue; + } +_offset: /* Requires: ip0, match0 */ + /* Compute the offset code */ + offset_2 = offset_1; + offset_1 = (U32)(ip0-match0); + offcode = offset_1 + ZSTD_REP_MOVE; + mLength = 4; + /* Count the backwards match length */ + while (((ip0>anchor) & (match0>prefixStart)) + && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ + +_match: /* Requires: ip0, match0, offcode */ + /* Count the forward length */ + mLength += ZSTD_count(ip0+mLength, match0+mLength, iend); + ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH); + /* match found */ + ip0 += mLength; + anchor = ip0; + + if (ip0 <= ilimit) { + /* Fill Table */ + assert(base+current0+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); + + if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */ + while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) { + /* store sequence */ + size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; + { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */ + hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); + ip0 += rLength; + ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH); + anchor = ip0; + continue; /* faster when present (confirmed on gcc-8) ... (?) */ + } } } + ip1 = ip0 + 1; + } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + assert(ms->dictMatchState == NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); + } +} + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_fast_dictMatchState_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + U32 const stepSize = cParams->targetLength + !(cParams->targetLength); + const BYTE* const base = ms->window.base; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 prefixStartIndex = ms->window.dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - HASH_READ_SIZE; + U32 offset_1=rep[0], offset_2=rep[1]; + U32 offsetSaved = 0; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; + const U32* const dictHashTable = dms->hashTable; + const U32 dictStartIndex = dms->window.dictLimit; + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictStart = dictBase + dictStartIndex; + const BYTE* const dictEnd = dms->window.nextSrc; + const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); + const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); + const U32 dictHLog = dictCParams->hashLog; + + /* if a dictionary is still attached, it necessarily means that + * it is within window size. So we just check it. */ + const U32 maxDistance = 1U << cParams->windowLog; + const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); + assert(endIndex - prefixStartIndex <= maxDistance); + (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ + + /* ensure there will be no underflow + * when translating a dict index into a local index */ + assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); + ip += (dictAndPrefixLength == 0); + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + + /* Main Search Loop */ + while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ + size_t mLength; + size_t const h = ZSTD_hashPtr(ip, hlog, mls); + U32 const curr = (U32)(ip-base); + U32 const matchIndex = hashTable[h]; + const BYTE* match = base + matchIndex; + const U32 repIndex = curr + 1 - offset_1; + const BYTE* repMatch = (repIndex < prefixStartIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + hashTable[h] = curr; /* update hash table */ + + if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH); + } else if ( (matchIndex <= prefixStartIndex) ) { + size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls); + U32 const dictMatchIndex = dictHashTable[dictHash]; + const BYTE* dictMatch = dictBase + dictMatchIndex; + if (dictMatchIndex <= dictStartIndex || + MEM_read32(dictMatch) != MEM_read32(ip)) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } else { + /* found a dict match */ + U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta); + mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4; + while (((ip>anchor) & (dictMatch>dictStart)) + && (ip[-1] == dictMatch[-1])) { + ip--; dictMatch--; mLength++; + } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + } else if (MEM_read32(match) != MEM_read32(ip)) { + /* it's not a match, and we're not going to check the dictionary */ + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } else { + /* found a regular match */ + U32 const offset = (U32)(ip-match); + mLength = ZSTD_count(ip+4, match+4, iend) + 4; + while (((ip>anchor) & (match>prefixStart)) + && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; + offset_1 = offset; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + } + + /* match found */ + ip += mLength; + anchor = ip; + + if (ip <= ilimit) { + /* Fill Table */ + assert(base+curr+2 > istart); /* check base overflow */ + hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */ + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? + dictBase - dictIndexDelta + repIndex2 : + base + repIndex2; + if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } + } + } + + /* save reps for next block */ + rep[0] = offset_1 ? offset_1 : offsetSaved; + rep[1] = offset_2 ? offset_2 : offsetSaved; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + assert(ms->dictMatchState != NULL); + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); + } +} + + +static size_t ZSTD_compressBlock_fast_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize, U32 const mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hlog = cParams->hashLog; + /* support stepSize of 0 */ + U32 const stepSize = cParams->targetLength + !(cParams->targetLength); + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); + const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); + const U32 dictStartIndex = lowLimit; + const BYTE* const dictStart = dictBase + dictStartIndex; + const U32 dictLimit = ms->window.dictLimit; + const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; + const BYTE* const prefixStart = base + prefixStartIndex; + const BYTE* const dictEnd = dictBase + prefixStartIndex; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + U32 offset_1=rep[0], offset_2=rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1); + + /* switch to "regular" variant if extDict is invalidated due to maxDistance */ + if (prefixStartIndex == dictStartIndex) + return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); + + /* Search Loop */ + while (ip < ilimit) { /* < instead of <=, because (ip+1) */ + const size_t h = ZSTD_hashPtr(ip, hlog, mls); + const U32 matchIndex = hashTable[h]; + const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base; + const BYTE* match = matchBase + matchIndex; + const U32 curr = (U32)(ip-base); + const U32 repIndex = curr + 1 - offset_1; + const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + hashTable[h] = curr; /* update hash table */ + DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr); + assert(offset_1 <= curr +1); /* check repIndex */ + + if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; + size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; + ip++; + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH); + ip += rLength; + anchor = ip; + } else { + if ( (matchIndex < dictStartIndex) || + (MEM_read32(match) != MEM_read32(ip)) ) { + assert(stepSize >= 1); + ip += ((ip-anchor) >> kSearchStrength) + stepSize; + continue; + } + { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; + const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; + U32 const offset = curr - matchIndex; + size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; + while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = offset; /* update offset history */ + ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH); + ip += mLength; + anchor = ip; + } } + + if (ip <= ilimit) { + /* Fill Table */ + hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; + hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); + /* check immediate repcode */ + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex2 = current2 - offset_2; + const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ + && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; + size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; + { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; } /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH); + hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; + ip += repLength2; + anchor = ip; + continue; + } + break; + } } } + + /* save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + U32 const mls = ms->cParams.minMatch; + switch(mls) + { + default: /* includes case 3 */ + case 4 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4); + case 5 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5); + case 6 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6); + case 7 : + return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7); + } +} diff --git a/lib/zstd/compress/zstd_fast.h b/lib/zstd/compress/zstd_fast.h new file mode 100644 index 000000000000..fddc2f532d21 --- /dev/null +++ b/lib/zstd/compress/zstd_fast.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_FAST_H +#define ZSTD_FAST_H + + +#include "../common/mem.h" /* U32 */ +#include "zstd_compress_internal.h" + +void ZSTD_fillHashTable(ZSTD_matchState_t* ms, + void const* end, ZSTD_dictTableLoadMethod_e dtlm); +size_t ZSTD_compressBlock_fast( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_fast_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + +#endif /* ZSTD_FAST_H */ diff --git a/lib/zstd/compress/zstd_lazy.c b/lib/zstd/compress/zstd_lazy.c new file mode 100644 index 000000000000..39aa2569aabc --- /dev/null +++ b/lib/zstd/compress/zstd_lazy.c @@ -0,0 +1,1412 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "zstd_lazy.h" + + +/*-************************************* +* Binary Tree search +***************************************/ + +static void +ZSTD_updateDUBT(ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iend, + U32 mls) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + if (idx != target) + DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)", + idx, target, ms->window.dictLimit); + assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */ + (void)iend; + + assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */ + for ( ; idx < target ; idx++) { + size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */ + U32 const matchIndex = hashTable[h]; + + U32* const nextCandidatePtr = bt + 2*(idx&btMask); + U32* const sortMarkPtr = nextCandidatePtr + 1; + + DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx); + hashTable[h] = idx; /* Update Hash Table */ + *nextCandidatePtr = matchIndex; /* update BT like a chain */ + *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK; + } + ms->nextToUpdate = target; +} + + +/** ZSTD_insertDUBT1() : + * sort one already inserted but unsorted position + * assumption : curr >= btlow == (curr - btmask) + * doesn't fail */ +static void +ZSTD_insertDUBT1(ZSTD_matchState_t* ms, + U32 curr, const BYTE* inputEnd, + U32 nbCompares, U32 btLow, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr; + const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = smallerPtr + 1; + U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ + U32 dummy32; /* to be nullified at the end */ + U32 const windowValid = ms->window.lowLimit; + U32 const maxDistance = 1U << cParams->windowLog; + U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid; + + + DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", + curr, dictLimit, windowLow); + assert(curr >= btLow); + assert(ip < iend); /* condition for ZSTD_count */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < curr); + /* note : all candidates are now supposed sorted, + * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK + * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */ + + if ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit) /* both in current segment*/ + || (curr < dictLimit) /* both in extDict */) { + const BYTE* const mBase = ( (dictMode != ZSTD_extDict) + || (matchIndex+matchLength >= dictLimit)) ? + base : dictBase; + assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */ + || (curr < dictLimit) ); + match = mBase + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* preparation for next read of match[matchLength] */ + } + + DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ", + curr, matchIndex, (U32)matchLength); + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u", + matchIndex, btLow, nextPtr[1]); + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u", + matchIndex, btLow, nextPtr[0]); + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; +} + + +static size_t +ZSTD_DUBT_findBetterDictMatch ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + size_t bestLength, + U32 nbCompares, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_matchState_t * const dms = ms->dictMatchState; + const ZSTD_compressionParameters* const dmsCParams = &dms->cParams; + const U32 * const dictHashTable = dms->hashTable; + U32 const hashLog = dmsCParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 dictMatchIndex = dictHashTable[h]; + + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + U32 const curr = (U32)(ip-base); + const BYTE* const dictBase = dms->window.base; + const BYTE* const dictEnd = dms->window.nextSrc; + U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base); + U32 const dictLowLimit = dms->window.lowLimit; + U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit; + + U32* const dictBt = dms->chainTable; + U32 const btLog = dmsCParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask; + + size_t commonLengthSmaller=0, commonLengthLarger=0; + + (void)dictMode; + assert(dictMode == ZSTD_dictMatchState); + + while (nbCompares-- && (dictMatchIndex > dictLowLimit)) { + U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dictBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (dictMatchIndex+matchLength >= dictHighLimit) + match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */ + + if (matchLength > bestLength) { + U32 matchIndex = dictMatchIndex + dictIndexDelta; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) { + DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)", + curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex); + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; + } + if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */ + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } + + if (bestLength >= MINMATCH) { + U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + curr, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + +} + + +static size_t +ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + size_t* offsetPtr, + U32 const mls, + const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + + const BYTE* const base = ms->window.base; + U32 const curr = (U32)(ip-base); + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); + + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; + U32 const unsortLimit = MAX(btLow, windowLow); + + U32* nextCandidate = bt + 2*(matchIndex&btMask); + U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1; + U32 nbCompares = 1U << cParams->searchLog; + U32 nbCandidates = nbCompares; + U32 previousCandidate = 0; + + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr); + assert(ip <= iend-8); /* required for h calculation */ + assert(dictMode != ZSTD_dedicatedDictSearch); + + /* reach end of unsorted candidates list */ + while ( (matchIndex > unsortLimit) + && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK) + && (nbCandidates > 1) ) { + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted", + matchIndex); + *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */ + previousCandidate = matchIndex; + matchIndex = *nextCandidate; + nextCandidate = bt + 2*(matchIndex&btMask); + unsortedMark = bt + 2*(matchIndex&btMask) + 1; + nbCandidates --; + } + + /* nullify last candidate if it's still unsorted + * simplification, detrimental to compression ratio, beneficial for speed */ + if ( (matchIndex > unsortLimit) + && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) { + DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u", + matchIndex); + *nextCandidate = *unsortedMark = 0; + } + + /* batch sort stacked candidates */ + matchIndex = previousCandidate; + while (matchIndex) { /* will end on matchIndex == 0 */ + U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1; + U32 const nextCandidateIdx = *nextCandidateIdxPtr; + ZSTD_insertDUBT1(ms, matchIndex, iend, + nbCandidates, unsortLimit, dictMode); + matchIndex = nextCandidateIdx; + nbCandidates++; + } + + /* find longest match */ + { size_t commonLengthSmaller = 0, commonLengthLarger = 0; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = bt + 2*(curr&btMask) + 1; + U32 matchEndIdx = curr + 8 + 1; + U32 dummy32; /* to be nullified at the end */ + size_t bestLength = 0; + + matchIndex = hashTable[h]; + hashTable[h] = curr; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex > windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match; + + if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) { + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) + bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex; + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + if (dictMode == ZSTD_dictMatchState) { + nbCompares = 0; /* in addition to avoiding checking any + * further in this loop, make sure we + * skip checking in the dictionary. */ + } + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + if (dictMode == ZSTD_dictMatchState && nbCompares) { + bestLength = ZSTD_DUBT_findBetterDictMatch( + ms, ip, iend, + offsetPtr, bestLength, nbCompares, + mls, dictMode); + } + + assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */ + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + if (bestLength >= MINMATCH) { + U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex; + DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)", + curr, (U32)bestLength, (U32)*offsetPtr, mIndex); + } + return bestLength; + } +} + + +/** ZSTD_BtFindBestMatch() : Tree updater, providing best match */ +FORCE_INLINE_TEMPLATE size_t +ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls /* template */, + const ZSTD_dictMode_e dictMode) +{ + DEBUGLOG(7, "ZSTD_BtFindBestMatch"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateDUBT(ms, ip, iLimit, mls); + return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode); +} + + +static size_t +ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + } +} + + +static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_BtFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); + case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); + case 7 : + case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + } +} + + + +/* ********************************* +* Hash Chain +***********************************/ +#define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)] + +/* Update chains up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal( + ZSTD_matchState_t* ms, + const ZSTD_compressionParameters* const cParams, + const BYTE* ip, U32 const mls) +{ + U32* const hashTable = ms->hashTable; + const U32 hashLog = cParams->hashLog; + U32* const chainTable = ms->chainTable; + const U32 chainMask = (1 << cParams->chainLog) - 1; + const BYTE* const base = ms->window.base; + const U32 target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + + while(idx < target) { /* catch up */ + size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls); + NEXT_IN_CHAIN(idx, chainMask) = hashTable[h]; + hashTable[h] = idx; + idx++; + } + + ms->nextToUpdate = target; + return hashTable[ZSTD_hashPtr(ip, hashLog, mls)]; +} + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) { + const ZSTD_compressionParameters* const cParams = &ms->cParams; + return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch); +} + +void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32* const hashTable = ms->hashTable; + U32* const chainTable = ms->chainTable; + U32 const chainSize = 1 << ms->cParams.chainLog; + U32 idx = ms->nextToUpdate; + U32 const minChain = chainSize < target ? target - chainSize : idx; + U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 const cacheSize = bucketSize - 1; + U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize; + U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts; + + /* We know the hashtable is oversized by a factor of `bucketSize`. + * We are going to temporarily pretend `bucketSize == 1`, keeping only a + * single entry. We will use the rest of the space to construct a temporary + * chaintable. + */ + U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG; + U32* const tmpHashTable = hashTable; + U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog); + U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog; + U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx; + + U32 hashIdx; + + assert(ms->cParams.chainLog <= 24); + assert(ms->cParams.hashLog >= ms->cParams.chainLog); + assert(idx != 0); + assert(tmpMinChain <= minChain); + + /* fill conventional hash table and conventional chain table */ + for ( ; idx < target; idx++) { + U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch); + if (idx >= tmpMinChain) { + tmpChainTable[idx - tmpMinChain] = hashTable[h]; + } + tmpHashTable[h] = idx; + } + + /* sort chains into ddss chain table */ + { + U32 chainPos = 0; + for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) { + U32 count; + U32 countBeyondMinChain = 0; + U32 i = tmpHashTable[hashIdx]; + for (count = 0; i >= tmpMinChain && count < cacheSize; count++) { + /* skip through the chain to the first position that won't be + * in the hash cache bucket */ + if (i < minChain) { + countBeyondMinChain++; + } + i = tmpChainTable[i - tmpMinChain]; + } + if (count == cacheSize) { + for (count = 0; count < chainLimit;) { + if (i < minChain) { + if (!i || countBeyondMinChain++ > cacheSize) { + /* only allow pulling `cacheSize` number of entries + * into the cache or chainTable beyond `minChain`, + * to replace the entries pulled out of the + * chainTable into the cache. This lets us reach + * back further without increasing the total number + * of entries in the chainTable, guaranteeing the + * DDSS chain table will fit into the space + * allocated for the regular one. */ + break; + } + } + chainTable[chainPos++] = i; + count++; + if (i < tmpMinChain) { + break; + } + i = tmpChainTable[i - tmpMinChain]; + } + } else { + count = 0; + } + if (count) { + tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count; + } else { + tmpHashTable[hashIdx] = 0; + } + } + assert(chainPos <= chainSize); /* I believe this is guaranteed... */ + } + + /* move chain pointers into the last entry of each hash bucket */ + for (hashIdx = (1 << hashLog); hashIdx; ) { + U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 const chainPackedPointer = tmpHashTable[hashIdx]; + U32 i; + for (i = 0; i < cacheSize; i++) { + hashTable[bucketIdx + i] = 0; + } + hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer; + } + + /* fill the buckets of the hash table */ + for (idx = ms->nextToUpdate; idx < target; idx++) { + U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch) + << ZSTD_LAZY_DDSS_BUCKET_LOG; + U32 i; + /* Shift hash cache down 1. */ + for (i = cacheSize - 1; i; i--) + hashTable[h + i] = hashTable[h + i - 1]; + hashTable[h] = idx; + } + + ms->nextToUpdate = target; +} + + +/* inlining is important to hardwire a hot branch (template emulation) */ +FORCE_INLINE_TEMPLATE +size_t ZSTD_HcFindBestMatch_generic ( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iLimit, + size_t* offsetPtr, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const chainTable = ms->chainTable; + const U32 chainSize = (1 << cParams->chainLog); + const U32 chainMask = chainSize-1; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const U32 curr = (U32)(ip-base); + const U32 maxDistance = 1U << cParams->windowLog; + const U32 lowestValid = ms->window.lowLimit; + const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid; + const U32 isDictionary = (ms->loadedDictEnd != 0); + const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; + const U32 minChain = curr > chainSize ? curr - chainSize : 0; + U32 nbAttempts = 1U << cParams->searchLog; + size_t ml=4-1; + + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch + ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0; + const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch + ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0; + + U32 matchIndex; + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32* entry = &dms->hashTable[ddsIdx]; + PREFETCH_L1(entry); + } + + /* HC4 match finder */ + matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls); + + for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) { + const BYTE* const match = base + matchIndex; + assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */ + if (match[ml] == ip[ml]) /* potentially better */ + currentMl = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex; + assert(match+4 <= dictEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= minChain) break; + matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask); + } + + if (dictMode == ZSTD_dedicatedDictSearch) { + const U32 ddsLowestIndex = dms->window.dictLimit; + const BYTE* const ddsBase = dms->window.base; + const BYTE* const ddsEnd = dms->window.nextSrc; + const U32 ddsSize = (U32)(ddsEnd - ddsBase); + const U32 ddsIndexDelta = dictLimit - ddsSize; + const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG); + const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1; + U32 ddsAttempt; + + for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) { + PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]); + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 const chainIndex = chainPackedPointer >> 8; + + PREFETCH_L1(&dms->chainTable[chainIndex]); + } + + for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->hashTable[ddsIdx + ddsAttempt]; + match = ddsBase + matchIndex; + + if (!matchIndex) { + return ml; + } + + /* guaranteed by table construction */ + (void)ddsLowestIndex; + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) { + /* best possible, avoids read overflow on next attempt */ + return ml; + } + } + } + + { + U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1]; + U32 chainIndex = chainPackedPointer >> 8; + U32 const chainLength = chainPackedPointer & 0xFF; + U32 const chainAttempts = nbAttempts - ddsAttempt; + U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts; + U32 chainAttempt; + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) { + PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]); + } + + for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) { + size_t currentMl=0; + const BYTE* match; + matchIndex = dms->chainTable[chainIndex]; + match = ddsBase + matchIndex; + + /* guaranteed by table construction */ + assert(matchIndex >= ddsLowestIndex); + assert(match+4 <= ddsEnd); + if (MEM_read32(match) == MEM_read32(ip)) { + /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4; + } + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + } + } + } else if (dictMode == ZSTD_dictMatchState) { + const U32* const dmsChainTable = dms->chainTable; + const U32 dmsChainSize = (1 << dms->cParams.chainLog); + const U32 dmsChainMask = dmsChainSize - 1; + const U32 dmsLowestIndex = dms->window.dictLimit; + const BYTE* const dmsBase = dms->window.base; + const BYTE* const dmsEnd = dms->window.nextSrc; + const U32 dmsSize = (U32)(dmsEnd - dmsBase); + const U32 dmsIndexDelta = dictLimit - dmsSize; + const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0; + + matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)]; + + for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) { + size_t currentMl=0; + const BYTE* const match = dmsBase + matchIndex; + assert(match+4 <= dmsEnd); + if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */ + currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4; + + /* save best solution */ + if (currentMl > ml) { + ml = currentMl; + *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE; + if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */ + } + + if (matchIndex <= dmsMinChain) break; + + matchIndex = dmsChainTable[matchIndex & dmsChainMask]; + } + } + + return ml; +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict); + } +} + + +static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState); + } +} + + +static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch); + } +} + + +FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS ( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* const iLimit, + size_t* offsetPtr) +{ + switch(ms->cParams.minMatch) + { + default : /* includes case 3 */ + case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict); + case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict); + case 7 : + case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict); + } +} + + +/* ******************************* +* Common parser - lazy strategy +*********************************/ +typedef enum { search_hashChain, search_binaryTree } searchMethod_e; + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_lazy_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const searchMethod_e searchMethod, const U32 depth, + ZSTD_dictMode_e const dictMode) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 prefixLowestIndex = ms->window.dictLimit; + const BYTE* const prefixLowest = base + prefixLowestIndex; + + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + + /** + * This table is indexed first by the four ZSTD_dictMode_e values, and then + * by the two searchMethod_e values. NULLs are placed for configurations + * that should never occur (extDict modes go to the other implementation + * below and there is no DDSS for binary tree search yet). + */ + const searchMax_f searchFuncs[4][2] = { + { + ZSTD_HcFindBestMatch_selectMLS, + ZSTD_BtFindBestMatch_selectMLS + }, + { + NULL, + NULL + }, + { + ZSTD_HcFindBestMatch_dictMatchState_selectMLS, + ZSTD_BtFindBestMatch_dictMatchState_selectMLS + }, + { + ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS, + NULL + } + }; + + searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree]; + U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; + + const int isDMS = dictMode == ZSTD_dictMatchState; + const int isDDS = dictMode == ZSTD_dedicatedDictSearch; + const int isDxS = isDMS || isDDS; + const ZSTD_matchState_t* const dms = ms->dictMatchState; + const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0; + const BYTE* const dictBase = isDxS ? dms->window.base : NULL; + const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL; + const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL; + const U32 dictIndexDelta = isDxS ? + prefixLowestIndex - (U32)(dictEnd - dictBase) : + 0; + const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest)); + + assert(searchMax != NULL); + + DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode); + + /* init */ + ip += (dictAndPrefixLength == 0); + if (dictMode == ZSTD_noDict) { + U32 const curr = (U32)(ip - base); + U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog); + U32 const maxRep = curr - windowLow; + if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; + if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0; + } + if (isDxS) { + /* dictMatchState repCode checks don't currently handle repCode == 0 + * disabling. */ + assert(offset_1 <= dictAndPrefixLength); + assert(offset_2 <= dictAndPrefixLength); + } + + /* Match Loop */ +#if defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + + /* check repCode */ + if (isDxS) { + const U32 repIndex = (U32)(ip - base) + 1 - offset_1; + const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch) + && repIndex < prefixLowestIndex) ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + if (depth==0) goto _storeSequence; + } + } + if ( dictMode == ZSTD_noDict + && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { + matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; + if (depth==0) goto _storeSequence; + } + + /* first search (depth 0) */ + { size_t offsetFound = 999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + if (isDxS) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + } + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) { + size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + if (isDxS) { + const U32 repIndex = (U32)(ip - base) - offset_1; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase + (repIndex - dictIndexDelta) : + base + repIndex; + if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; + size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; + int const gain2 = (int)(mlRep * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((mlRep >= 4) && (gain2 > gain1)) + matchLength = mlRep, offset = 0, start = ip; + } + } + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* NOTE: + * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior. + * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which + * overflows the pointer, which is undefined behavior. + */ + /* catch up */ + if (offset) { + if (dictMode == ZSTD_noDict) { + while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest)) + && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */ + { start--; matchLength++; } + } + if (isDxS) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex; + const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + } + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + if (isDxS) { + while (ip <= ilimit) { + U32 const current2 = (U32)(ip-base); + U32 const repIndex = current2 - offset_2; + const BYTE* repMatch = repIndex < prefixLowestIndex ? + dictBase - dictIndexDelta + repIndex : + base + repIndex; + if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */) + && (MEM_read32(repMatch) == MEM_read32(ip)) ) { + const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; + } + break; + } + } + + if (dictMode == ZSTD_noDict) { + while ( ((ip <= ilimit) & (offset_2>0)) + && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) { + /* store sequence */ + matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } } } + + /* Save reps for next block */ + rep[0] = offset_1 ? offset_1 : savedOffset; + rep[1] = offset_2 ? offset_2 : savedOffset; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); +} + + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch); +} + +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch); +} + + +FORCE_INLINE_TEMPLATE +size_t ZSTD_compressBlock_lazy_extDict_generic( + ZSTD_matchState_t* ms, seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const searchMethod_e searchMethod, const U32 depth) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* const dictBase = ms->window.dictBase; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const dictStart = dictBase + ms->window.lowLimit; + const U32 windowLog = ms->cParams.windowLog; + + typedef size_t (*searchMax_f)( + ZSTD_matchState_t* ms, + const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); + searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; + + U32 offset_1 = rep[0], offset_2 = rep[1]; + + DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic"); + + /* init */ + ip += (ip == prefixStart); + + /* Match Loop */ +#if defined(__x86_64__) + /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the + * code alignment is perturbed. To fix the instability align the loop on 32-bytes. + */ + __asm__(".p2align 5"); +#endif + while (ip < ilimit) { + size_t matchLength=0; + size_t offset=0; + const BYTE* start=ip+1; + U32 curr = (U32)(ip-base); + + /* check repCode */ + { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog); + const U32 repIndex = (U32)(curr+1 - offset_1); + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip+1) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4; + if (depth==0) goto _storeSequence; + } } + + /* first search (depth 0) */ + { size_t offsetFound = 999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offsetFound); + if (ml2 > matchLength) + matchLength = ml2, start = ip, offset=offsetFound; + } + + if (matchLength < 4) { + ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */ + continue; + } + + /* let's try to find a better solution */ + if (depth>=1) + while (ip= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 3); + int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 1 */ + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; /* search a better one */ + } } + + /* let's find an even better one */ + if ((depth==2) && (ip= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + int const gain2 = (int)(repLength * 4); + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1); + if ((repLength >= 4) && (gain2 > gain1)) + matchLength = repLength, offset = 0, start = ip; + } } + + /* search match, depth 2 */ + { size_t offset2=999999999; + size_t const ml2 = searchMax(ms, ip, iend, &offset2); + int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */ + int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7); + if ((ml2 >= 4) && (gain2 > gain1)) { + matchLength = ml2, offset = offset2, start = ip; + continue; + } } } + break; /* nothing found : store previous solution */ + } + + /* catch up */ + if (offset) { + U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE)); + const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex; + const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart; + while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */ + offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE); + } + + /* store sequence */ +_storeSequence: + { size_t const litLength = start - anchor; + ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH); + anchor = ip = start + matchLength; + } + + /* check immediate repcode */ + while (ip <= ilimit) { + const U32 repCurrent = (U32)(ip-base); + const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog); + const U32 repIndex = repCurrent - offset_2; + const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; + const BYTE* const repMatch = repBase + repIndex; + if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if (MEM_read32(ip) == MEM_read32(repMatch)) { + /* repcode detected we should take it */ + const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; + matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4; + offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */ + ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH); + ip += matchLength; + anchor = ip; + continue; /* faster when present ... (?) */ + } + break; + } } + + /* Save reps for next block */ + rep[0] = offset_1; + rep[1] = offset_2; + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); +} + +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); +} + +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); +} + +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) + +{ + return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); +} diff --git a/lib/zstd/compress/zstd_lazy.h b/lib/zstd/compress/zstd_lazy.h new file mode 100644 index 000000000000..1fb7621e6a88 --- /dev/null +++ b/lib/zstd/compress/zstd_lazy.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LAZY_H +#define ZSTD_LAZY_H + + +#include "zstd_compress_internal.h" + +/** + * Dedicated Dictionary Search Structure bucket log. In the + * ZSTD_dedicatedDictSearch mode, the hashTable has + * 2 ** ZSTD_LAZY_DDSS_BUCKET_LOG entries in each bucket, rather than just + * one. + */ +#define ZSTD_LAZY_DDSS_BUCKET_LOG 2 + +U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); + +void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip); + +void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ + +size_t ZSTD_compressBlock_btlazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_btlazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_greedy_dedicatedDictSearch( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_greedy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_lazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btlazy2_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + +#endif /* ZSTD_LAZY_H */ diff --git a/lib/zstd/compress/zstd_ldm.c b/lib/zstd/compress/zstd_ldm.c new file mode 100644 index 000000000000..4b786fe1cd0d --- /dev/null +++ b/lib/zstd/compress/zstd_ldm.c @@ -0,0 +1,686 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_ldm.h" + +#include "../common/debug.h" +#include +#include "zstd_fast.h" /* ZSTD_fillHashTable() */ +#include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */ +#include "zstd_ldm_geartab.h" + +#define LDM_BUCKET_SIZE_LOG 3 +#define LDM_MIN_MATCH_LENGTH 64 +#define LDM_HASH_RLOG 7 + +typedef struct { + U64 rolling; + U64 stopMask; +} ldmRollingHashState_t; + +/** ZSTD_ldm_gear_init(): + * + * Initializes the rolling hash state such that it will honor the + * settings in params. */ +static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const* params) +{ + unsigned maxBitsInMask = MIN(params->minMatchLength, 64); + unsigned hashRateLog = params->hashRateLog; + + state->rolling = ~(U32)0; + + /* The choice of the splitting criterion is subject to two conditions: + * 1. it has to trigger on average every 2^(hashRateLog) bytes; + * 2. ideally, it has to depend on a window of minMatchLength bytes. + * + * In the gear hash algorithm, bit n depends on the last n bytes; + * so in order to obtain a good quality splitting criterion it is + * preferable to use bits with high weight. + * + * To match condition 1 we use a mask with hashRateLog bits set + * and, because of the previous remark, we make sure these bits + * have the highest possible weight while still respecting + * condition 2. + */ + if (hashRateLog > 0 && hashRateLog <= maxBitsInMask) { + state->stopMask = (((U64)1 << hashRateLog) - 1) << (maxBitsInMask - hashRateLog); + } else { + /* In this degenerate case we simply honor the hash rate. */ + state->stopMask = ((U64)1 << hashRateLog) - 1; + } +} + +/** ZSTD_ldm_gear_feed(): + * + * Registers in the splits array all the split points found in the first + * size bytes following the data pointer. This function terminates when + * either all the data has been processed or LDM_BATCH_SIZE splits are + * present in the splits array. + * + * Precondition: The splits array must not be full. + * Returns: The number of bytes processed. */ +static size_t ZSTD_ldm_gear_feed(ldmRollingHashState_t* state, + BYTE const* data, size_t size, + size_t* splits, unsigned* numSplits) +{ + size_t n; + U64 hash, mask; + + hash = state->rolling; + mask = state->stopMask; + n = 0; + +#define GEAR_ITER_ONCE() do { \ + hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \ + n += 1; \ + if (UNLIKELY((hash & mask) == 0)) { \ + splits[*numSplits] = n; \ + *numSplits += 1; \ + if (*numSplits == LDM_BATCH_SIZE) \ + goto done; \ + } \ + } while (0) + + while (n + 3 < size) { + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + GEAR_ITER_ONCE(); + } + while (n < size) { + GEAR_ITER_ONCE(); + } + +#undef GEAR_ITER_ONCE + +done: + state->rolling = hash; + return n; +} + +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams) +{ + params->windowLog = cParams->windowLog; + ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX); + DEBUGLOG(4, "ZSTD_ldm_adjustParameters"); + if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG; + if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH; + if (params->hashLog == 0) { + params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG); + assert(params->hashLog <= ZSTD_HASHLOG_MAX); + } + if (params->hashRateLog == 0) { + params->hashRateLog = params->windowLog < params->hashLog + ? 0 + : params->windowLog - params->hashLog; + } + params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog); +} + +size_t ZSTD_ldm_getTableSize(ldmParams_t params) +{ + size_t const ldmHSize = ((size_t)1) << params.hashLog; + size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog); + size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog); + size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize) + + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t)); + return params.enableLdm ? totalSize : 0; +} + +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize) +{ + return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0; +} + +/** ZSTD_ldm_getBucket() : + * Returns a pointer to the start of the bucket associated with hash. */ +static ldmEntry_t* ZSTD_ldm_getBucket( + ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams) +{ + return ldmState->hashTable + (hash << ldmParams.bucketSizeLog); +} + +/** ZSTD_ldm_insertEntry() : + * Insert the entry with corresponding hash into the hash table */ +static void ZSTD_ldm_insertEntry(ldmState_t* ldmState, + size_t const hash, const ldmEntry_t entry, + ldmParams_t const ldmParams) +{ + BYTE* const pOffset = ldmState->bucketOffsets + hash; + unsigned const offset = *pOffset; + + *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + offset) = entry; + *pOffset = (BYTE)((offset + 1) & ((1u << ldmParams.bucketSizeLog) - 1)); + +} + +/** ZSTD_ldm_countBackwardsMatch() : + * Returns the number of bytes that match backwards before pIn and pMatch. + * + * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */ +static size_t ZSTD_ldm_countBackwardsMatch( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pMatchBase) +{ + size_t matchLength = 0; + while (pIn > pAnchor && pMatch > pMatchBase && pIn[-1] == pMatch[-1]) { + pIn--; + pMatch--; + matchLength++; + } + return matchLength; +} + +/** ZSTD_ldm_countBackwardsMatch_2segments() : + * Returns the number of bytes that match backwards from pMatch, + * even with the backwards match spanning 2 different segments. + * + * On reaching `pMatchBase`, start counting from mEnd */ +static size_t ZSTD_ldm_countBackwardsMatch_2segments( + const BYTE* pIn, const BYTE* pAnchor, + const BYTE* pMatch, const BYTE* pMatchBase, + const BYTE* pExtDictStart, const BYTE* pExtDictEnd) +{ + size_t matchLength = ZSTD_ldm_countBackwardsMatch(pIn, pAnchor, pMatch, pMatchBase); + if (pMatch - matchLength != pMatchBase || pMatchBase == pExtDictStart) { + /* If backwards match is entirely in the extDict or prefix, immediately return */ + return matchLength; + } + DEBUGLOG(7, "ZSTD_ldm_countBackwardsMatch_2segments: found 2-parts backwards match (length in prefix==%zu)", matchLength); + matchLength += ZSTD_ldm_countBackwardsMatch(pIn - matchLength, pAnchor, pExtDictEnd, pExtDictStart); + DEBUGLOG(7, "final backwards match length = %zu", matchLength); + return matchLength; +} + +/** ZSTD_ldm_fillFastTables() : + * + * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies. + * This is similar to ZSTD_loadDictionaryContent. + * + * The tables for the other strategies are filled within their + * block compressors. */ +static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms, + void const* end) +{ + const BYTE* const iend = (const BYTE*)end; + + switch(ms->cParams.strategy) + { + case ZSTD_fast: + ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast); + break; + + case ZSTD_dfast: + ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast); + break; + + case ZSTD_greedy: + case ZSTD_lazy: + case ZSTD_lazy2: + case ZSTD_btlazy2: + case ZSTD_btopt: + case ZSTD_btultra: + case ZSTD_btultra2: + break; + default: + assert(0); /* not possible : not a valid strategy id */ + } + + return 0; +} + +void ZSTD_ldm_fillHashTable( + ldmState_t* ldmState, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params) +{ + U32 const minMatchLength = params->minMatchLength; + U32 const hBits = params->hashLog - params->bucketSizeLog; + BYTE const* const base = ldmState->window.base; + BYTE const* const istart = ip; + ldmRollingHashState_t hashState; + size_t* const splits = ldmState->splitIndices; + unsigned numSplits; + + DEBUGLOG(5, "ZSTD_ldm_fillHashTable"); + + ZSTD_ldm_gear_init(&hashState, params); + while (ip < iend) { + size_t hashed; + unsigned n; + + numSplits = 0; + hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits); + + for (n = 0; n < numSplits; n++) { + if (ip + splits[n] >= istart + minMatchLength) { + BYTE const* const split = ip + splits[n] - minMatchLength; + U64 const xxhash = xxh64(split, minMatchLength, 0); + U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); + ldmEntry_t entry; + + entry.offset = (U32)(split - base); + entry.checksum = (U32)(xxhash >> 32); + ZSTD_ldm_insertEntry(ldmState, hash, entry, *params); + } + } + + ip += hashed; + } +} + + +/** ZSTD_ldm_limitTableUpdate() : + * + * Sets cctx->nextToUpdate to a position corresponding closer to anchor + * if it is far way + * (after a long match, only update tables a limited amount). */ +static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) +{ + U32 const curr = (U32)(anchor - ms->window.base); + if (curr > ms->nextToUpdate + 1024) { + ms->nextToUpdate = + curr - MIN(512, curr - ms->nextToUpdate - 1024); + } +} + +static size_t ZSTD_ldm_generateSequences_internal( + ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + /* LDM parameters */ + int const extDict = ZSTD_window_hasExtDict(ldmState->window); + U32 const minMatchLength = params->minMatchLength; + U32 const entsPerBucket = 1U << params->bucketSizeLog; + U32 const hBits = params->hashLog - params->bucketSizeLog; + /* Prefix and extDict parameters */ + U32 const dictLimit = ldmState->window.dictLimit; + U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit; + BYTE const* const base = ldmState->window.base; + BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL; + BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL; + BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL; + BYTE const* const lowPrefixPtr = base + dictLimit; + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + BYTE const* const ilimit = iend - HASH_READ_SIZE; + /* Input positions */ + BYTE const* anchor = istart; + BYTE const* ip = istart; + /* Rolling hash state */ + ldmRollingHashState_t hashState; + /* Arrays for staged-processing */ + size_t* const splits = ldmState->splitIndices; + ldmMatchCandidate_t* const candidates = ldmState->matchCandidates; + unsigned numSplits; + + if (srcSize < minMatchLength) + return iend - anchor; + + /* Initialize the rolling hash state with the first minMatchLength bytes */ + ZSTD_ldm_gear_init(&hashState, params); + { + size_t n = 0; + + while (n < minMatchLength) { + numSplits = 0; + n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n, + splits, &numSplits); + } + ip += minMatchLength; + } + + while (ip < ilimit) { + size_t hashed; + unsigned n; + + numSplits = 0; + hashed = ZSTD_ldm_gear_feed(&hashState, ip, ilimit - ip, + splits, &numSplits); + + for (n = 0; n < numSplits; n++) { + BYTE const* const split = ip + splits[n] - minMatchLength; + U64 const xxhash = xxh64(split, minMatchLength, 0); + U32 const hash = (U32)(xxhash & (((U32)1 << hBits) - 1)); + + candidates[n].split = split; + candidates[n].hash = hash; + candidates[n].checksum = (U32)(xxhash >> 32); + candidates[n].bucket = ZSTD_ldm_getBucket(ldmState, hash, *params); + PREFETCH_L1(candidates[n].bucket); + } + + for (n = 0; n < numSplits; n++) { + size_t forwardMatchLength = 0, backwardMatchLength = 0, + bestMatchLength = 0, mLength; + BYTE const* const split = candidates[n].split; + U32 const checksum = candidates[n].checksum; + U32 const hash = candidates[n].hash; + ldmEntry_t* const bucket = candidates[n].bucket; + ldmEntry_t const* cur; + ldmEntry_t const* bestEntry = NULL; + ldmEntry_t newEntry; + + newEntry.offset = (U32)(split - base); + newEntry.checksum = checksum; + + /* If a split point would generate a sequence overlapping with + * the previous one, we merely register it in the hash table and + * move on */ + if (split < anchor) { + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + continue; + } + + for (cur = bucket; cur < bucket + entsPerBucket; cur++) { + size_t curForwardMatchLength, curBackwardMatchLength, + curTotalMatchLength; + if (cur->checksum != checksum || cur->offset <= lowestIndex) { + continue; + } + if (extDict) { + BYTE const* const curMatchBase = + cur->offset < dictLimit ? dictBase : base; + BYTE const* const pMatch = curMatchBase + cur->offset; + BYTE const* const matchEnd = + cur->offset < dictLimit ? dictEnd : iend; + BYTE const* const lowMatchPtr = + cur->offset < dictLimit ? dictStart : lowPrefixPtr; + curForwardMatchLength = + ZSTD_count_2segments(split, pMatch, iend, matchEnd, lowPrefixPtr); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = ZSTD_ldm_countBackwardsMatch_2segments( + split, anchor, pMatch, lowMatchPtr, dictStart, dictEnd); + } else { /* !extDict */ + BYTE const* const pMatch = base + cur->offset; + curForwardMatchLength = ZSTD_count(split, pMatch, iend); + if (curForwardMatchLength < minMatchLength) { + continue; + } + curBackwardMatchLength = + ZSTD_ldm_countBackwardsMatch(split, anchor, pMatch, lowPrefixPtr); + } + curTotalMatchLength = curForwardMatchLength + curBackwardMatchLength; + + if (curTotalMatchLength > bestMatchLength) { + bestMatchLength = curTotalMatchLength; + forwardMatchLength = curForwardMatchLength; + backwardMatchLength = curBackwardMatchLength; + bestEntry = cur; + } + } + + /* No match found -- insert an entry into the hash table + * and process the next candidate match */ + if (bestEntry == NULL) { + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + continue; + } + + /* Match found */ + mLength = forwardMatchLength + backwardMatchLength; + { + U32 const offset = (U32)(split - base) - bestEntry->offset; + rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; + + /* Out of sequence storage */ + if (rawSeqStore->size == rawSeqStore->capacity) + return ERROR(dstSize_tooSmall); + seq->litLength = (U32)(split - backwardMatchLength - anchor); + seq->matchLength = (U32)mLength; + seq->offset = offset; + rawSeqStore->size++; + } + + /* Insert the current entry into the hash table --- it must be + * done after the previous block to avoid clobbering bestEntry */ + ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params); + + anchor = split + forwardMatchLength; + } + + ip += hashed; + } + + return iend - anchor; +} + +/*! ZSTD_ldm_reduceTable() : + * reduce table indexes by `reducerValue` */ +static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, + U32 const reducerValue) +{ + U32 u; + for (u = 0; u < size; u++) { + if (table[u].offset < reducerValue) table[u].offset = 0; + else table[u].offset -= reducerValue; + } +} + +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldmState, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize) +{ + U32 const maxDist = 1U << params->windowLog; + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + size_t const kMaxChunkSize = 1 << 20; + size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); + size_t chunk; + size_t leftoverSize = 0; + + assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); + /* Check that ZSTD_window_update() has been called for this chunk prior + * to passing it to this function. + */ + assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); + /* The input could be very large (in zstdmt), so it must be broken up into + * chunks to enforce the maximum distance and handle overflow correction. + */ + assert(sequences->pos <= sequences->size); + assert(sequences->size <= sequences->capacity); + for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { + BYTE const* const chunkStart = istart + chunk * kMaxChunkSize; + size_t const remaining = (size_t)(iend - chunkStart); + BYTE const *const chunkEnd = + (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize; + size_t const chunkSize = chunkEnd - chunkStart; + size_t newLeftoverSize; + size_t const prevSize = sequences->size; + + assert(chunkStart < iend); + /* 1. Perform overflow correction if necessary. */ + if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { + U32 const ldmHSize = 1U << params->hashLog; + U32 const correction = ZSTD_window_correctOverflow( + &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); + ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); + /* invalidate dictionaries on overflow correction */ + ldmState->loadedDictEnd = 0; + } + /* 2. We enforce the maximum offset allowed. + * + * kMaxChunkSize should be small enough that we don't lose too much of + * the window through early invalidation. + * TODO: * Test the chunk size. + * * Try invalidation after the sequence generation and test the + * the offset against maxDist directly. + * + * NOTE: Because of dictionaries + sequence splitting we MUST make sure + * that any offset used is valid at the END of the sequence, since it may + * be split into two sequences. This condition holds when using + * ZSTD_window_enforceMaxDist(), but if we move to checking offsets + * against maxDist directly, we'll have to carefully handle that case. + */ + ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL); + /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ + newLeftoverSize = ZSTD_ldm_generateSequences_internal( + ldmState, sequences, params, chunkStart, chunkSize); + if (ZSTD_isError(newLeftoverSize)) + return newLeftoverSize; + /* 4. We add the leftover literals from previous iterations to the first + * newly generated sequence, or add the `newLeftoverSize` if none are + * generated. + */ + /* Prepend the leftover literals from the last call */ + if (prevSize < sequences->size) { + sequences->seq[prevSize].litLength += (U32)leftoverSize; + leftoverSize = newLeftoverSize; + } else { + assert(newLeftoverSize == chunkSize); + leftoverSize += chunkSize; + } + } + return 0; +} + +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) { + while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) { + rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos; + if (srcSize <= seq->litLength) { + /* Skip past srcSize literals */ + seq->litLength -= (U32)srcSize; + return; + } + srcSize -= seq->litLength; + seq->litLength = 0; + if (srcSize < seq->matchLength) { + /* Skip past the first srcSize of the match */ + seq->matchLength -= (U32)srcSize; + if (seq->matchLength < minMatch) { + /* The match is too short, omit it */ + if (rawSeqStore->pos + 1 < rawSeqStore->size) { + seq[1].litLength += seq[0].matchLength; + } + rawSeqStore->pos++; + } + return; + } + srcSize -= seq->matchLength; + seq->matchLength = 0; + rawSeqStore->pos++; + } +} + +/** + * If the sequence length is longer than remaining then the sequence is split + * between this block and the next. + * + * Returns the current sequence to handle, or if the rest of the block should + * be literals, it returns a sequence with offset == 0. + */ +static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, + U32 const remaining, U32 const minMatch) +{ + rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; + assert(sequence.offset > 0); + /* Likely: No partial sequence */ + if (remaining >= sequence.litLength + sequence.matchLength) { + rawSeqStore->pos++; + return sequence; + } + /* Cut the sequence short (offset == 0 ==> rest is literals). */ + if (remaining <= sequence.litLength) { + sequence.offset = 0; + } else if (remaining < sequence.litLength + sequence.matchLength) { + sequence.matchLength = remaining - sequence.litLength; + if (sequence.matchLength < minMatch) { + sequence.offset = 0; + } + } + /* Skip past `remaining` bytes for the future sequences. */ + ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch); + return sequence; +} + +void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { + U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); + while (currPos && rawSeqStore->pos < rawSeqStore->size) { + rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; + if (currPos >= currSeq.litLength + currSeq.matchLength) { + currPos -= currSeq.litLength + currSeq.matchLength; + rawSeqStore->pos++; + } else { + rawSeqStore->posInSequence = currPos; + break; + } + } + if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { + rawSeqStore->posInSequence = 0; + } +} + +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + unsigned const minMatch = cParams->minMatch; + ZSTD_blockCompressor const blockCompressor = + ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms)); + /* Input bounds */ + BYTE const* const istart = (BYTE const*)src; + BYTE const* const iend = istart + srcSize; + /* Input positions */ + BYTE const* ip = istart; + + DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize); + /* If using opt parser, use LDMs only as candidates rather than always accepting them */ + if (cParams->strategy >= ZSTD_btopt) { + size_t lastLLSize; + ms->ldmSeqStore = rawSeqStore; + lastLLSize = blockCompressor(ms, seqStore, rep, src, srcSize); + ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore, srcSize); + return lastLLSize; + } + + assert(rawSeqStore->pos <= rawSeqStore->size); + assert(rawSeqStore->size <= rawSeqStore->capacity); + /* Loop through each sequence and apply the block compressor to the literals */ + while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { + /* maybeSplitSequence updates rawSeqStore->pos */ + rawSeq const sequence = maybeSplitSequence(rawSeqStore, + (U32)(iend - ip), minMatch); + int i; + /* End signal */ + if (sequence.offset == 0) + break; + + assert(ip + sequence.litLength + sequence.matchLength <= iend); + + /* Fill tables for block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, ip); + /* Run the block compressor */ + DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength); + { + size_t const newLitLength = + blockCompressor(ms, seqStore, rep, ip, sequence.litLength); + ip += sequence.litLength; + /* Update the repcodes */ + for (i = ZSTD_REP_NUM - 1; i > 0; i--) + rep[i] = rep[i-1]; + rep[0] = sequence.offset; + /* Store the sequence */ + ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend, + sequence.offset + ZSTD_REP_MOVE, + sequence.matchLength - MINMATCH); + ip += sequence.matchLength; + } + } + /* Fill the tables for the block compressor */ + ZSTD_ldm_limitTableUpdate(ms, ip); + ZSTD_ldm_fillFastTables(ms, ip); + /* Compress the last literals */ + return blockCompressor(ms, seqStore, rep, ip, iend - ip); +} diff --git a/lib/zstd/compress/zstd_ldm.h b/lib/zstd/compress/zstd_ldm.h new file mode 100644 index 000000000000..5ee467eaca2e --- /dev/null +++ b/lib/zstd/compress/zstd_ldm.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LDM_H +#define ZSTD_LDM_H + + +#include "zstd_compress_internal.h" /* ldmParams_t, U32 */ +#include /* ZSTD_CCtx, size_t */ + +/*-************************************* +* Long distance matching +***************************************/ + +#define ZSTD_LDM_DEFAULT_WINDOW_LOG ZSTD_WINDOWLOG_LIMIT_DEFAULT + +void ZSTD_ldm_fillHashTable( + ldmState_t* state, const BYTE* ip, + const BYTE* iend, ldmParams_t const* params); + +/** + * ZSTD_ldm_generateSequences(): + * + * Generates the sequences using the long distance match finder. + * Generates long range matching sequences in `sequences`, which parse a prefix + * of the source. `sequences` must be large enough to store every sequence, + * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. + * @returns 0 or an error code. + * + * NOTE: The user must have called ZSTD_window_update() for all of the input + * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. + * NOTE: This function returns an error if it runs out of space to store + * sequences. + */ +size_t ZSTD_ldm_generateSequences( + ldmState_t* ldms, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize); + +/** + * ZSTD_ldm_blockCompress(): + * + * Compresses a block using the predefined sequences, along with a secondary + * block compressor. The literals section of every sequence is passed to the + * secondary block compressor, and those sequences are interspersed with the + * predefined sequences. Returns the length of the last literals. + * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. + * `rawSeqStore.seq` may also be updated to split the last sequence between two + * blocks. + * @return The length of the last literals. + * + * NOTE: The source must be at most the maximum block size, but the predefined + * sequences can be any size, and may be longer than the block. In the case that + * they are longer than the block, the last sequences may need to be split into + * two. We handle that case correctly, and update `rawSeqStore` appropriately. + * NOTE: This function does not return any errors. + */ +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +/** + * ZSTD_ldm_skipSequences(): + * + * Skip past `srcSize` bytes worth of sequences in `rawSeqStore`. + * Avoids emitting matches less than `minMatch` bytes. + * Must be called for data that is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, + U32 const minMatch); + +/* ZSTD_ldm_skipRawSeqStoreBytes(): + * Moves forward in rawSeqStore by nbBytes, updating fields 'pos' and 'posInSequence'. + * Not to be used in conjunction with ZSTD_ldm_skipSequences(). + * Must be called for data with is not passed to ZSTD_ldm_blockCompress(). + */ +void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes); + +/** ZSTD_ldm_getTableSize() : + * Estimate the space needed for long distance matching tables or 0 if LDM is + * disabled. + */ +size_t ZSTD_ldm_getTableSize(ldmParams_t params); + +/** ZSTD_ldm_getSeqSpace() : + * Return an upper bound on the number of sequences that can be produced by + * the long distance matcher, or 0 if LDM is disabled. + */ +size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize); + +/** ZSTD_ldm_adjustParameters() : + * If the params->hashRateLog is not set, set it to its default value based on + * windowLog and params->hashLog. + * + * Ensures that params->bucketSizeLog is <= params->hashLog (setting it to + * params->hashLog if it is not). + * + * Ensures that the minMatchLength >= targetLength during optimal parsing. + */ +void ZSTD_ldm_adjustParameters(ldmParams_t* params, + ZSTD_compressionParameters const* cParams); + + +#endif /* ZSTD_FAST_H */ diff --git a/lib/zstd/compress/zstd_ldm_geartab.h b/lib/zstd/compress/zstd_ldm_geartab.h new file mode 100644 index 000000000000..e5c24d856b0a --- /dev/null +++ b/lib/zstd/compress/zstd_ldm_geartab.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_LDM_GEARTAB_H +#define ZSTD_LDM_GEARTAB_H + +static U64 ZSTD_ldm_gearTab[256] = { + 0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc, + 0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05, + 0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e, + 0x9c8528f65badeaca, 0x86563706e2097529, 0x2902475fa375d889, + 0xafb32a9739a5ebe6, 0xce2714da3883e639, 0x21eaf821722e69e, + 0x37b628620b628, 0x49a8d455d88caf5, 0x8556d711e6958140, + 0x4f7ae74fc605c1f, 0x829f0c3468bd3a20, 0x4ffdc885c625179e, + 0x8473de048a3daf1b, 0x51008822b05646b2, 0x69d75d12b2d1cc5f, + 0x8c9d4a19159154bc, 0xc3cc10f4abbd4003, 0xd06ddc1cecb97391, + 0xbe48e6e7ed80302e, 0x3481db31cee03547, 0xacc3f67cdaa1d210, + 0x65cb771d8c7f96cc, 0x8eb27177055723dd, 0xc789950d44cd94be, + 0x934feadc3700b12b, 0x5e485f11edbdf182, 0x1e2e2a46fd64767a, + 0x2969ca71d82efa7c, 0x9d46e9935ebbba2e, 0xe056b67e05e6822b, + 0x94d73f55739d03a0, 0xcd7010bdb69b5a03, 0x455ef9fcd79b82f4, + 0x869cb54a8749c161, 0x38d1a4fa6185d225, 0xb475166f94bbe9bb, + 0xa4143548720959f1, 0x7aed4780ba6b26ba, 0xd0ce264439e02312, + 0x84366d746078d508, 0xa8ce973c72ed17be, 0x21c323a29a430b01, + 0x9962d617e3af80ee, 0xab0ce91d9c8cf75b, 0x530e8ee6d19a4dbc, + 0x2ef68c0cf53f5d72, 0xc03a681640a85506, 0x496e4e9f9c310967, + 0x78580472b59b14a0, 0x273824c23b388577, 0x66bf923ad45cb553, + 0x47ae1a5a2492ba86, 0x35e304569e229659, 0x4765182a46870b6f, + 0x6cbab625e9099412, 0xddac9a2e598522c1, 0x7172086e666624f2, + 0xdf5003ca503b7837, 0x88c0c1db78563d09, 0x58d51865acfc289d, + 0x177671aec65224f1, 0xfb79d8a241e967d7, 0x2be1e101cad9a49a, + 0x6625682f6e29186b, 0x399553457ac06e50, 0x35dffb4c23abb74, + 0x429db2591f54aade, 0xc52802a8037d1009, 0x6acb27381f0b25f3, + 0xf45e2551ee4f823b, 0x8b0ea2d99580c2f7, 0x3bed519cbcb4e1e1, + 0xff452823dbb010a, 0x9d42ed614f3dd267, 0x5b9313c06257c57b, + 0xa114b8008b5e1442, 0xc1fe311c11c13d4b, 0x66e8763ea34c5568, + 0x8b982af1c262f05d, 0xee8876faaa75fbb7, 0x8a62a4d0d172bb2a, + 0xc13d94a3b7449a97, 0x6dbbba9dc15d037c, 0xc786101f1d92e0f1, + 0xd78681a907a0b79b, 0xf61aaf2962c9abb9, 0x2cfd16fcd3cb7ad9, + 0x868c5b6744624d21, 0x25e650899c74ddd7, 0xba042af4a7c37463, + 0x4eb1a539465a3eca, 0xbe09dbf03b05d5ca, 0x774e5a362b5472ba, + 0x47a1221229d183cd, 0x504b0ca18ef5a2df, 0xdffbdfbde2456eb9, + 0x46cd2b2fbee34634, 0xf2aef8fe819d98c3, 0x357f5276d4599d61, + 0x24a5483879c453e3, 0x88026889192b4b9, 0x28da96671782dbec, + 0x4ef37c40588e9aaa, 0x8837b90651bc9fb3, 0xc164f741d3f0e5d6, + 0xbc135a0a704b70ba, 0x69cd868f7622ada, 0xbc37ba89e0b9c0ab, + 0x47c14a01323552f6, 0x4f00794bacee98bb, 0x7107de7d637a69d5, + 0x88af793bb6f2255e, 0xf3c6466b8799b598, 0xc288c616aa7f3b59, + 0x81ca63cf42fca3fd, 0x88d85ace36a2674b, 0xd056bd3792389e7, + 0xe55c396c4e9dd32d, 0xbefb504571e6c0a6, 0x96ab32115e91e8cc, + 0xbf8acb18de8f38d1, 0x66dae58801672606, 0x833b6017872317fb, + 0xb87c16f2d1c92864, 0xdb766a74e58b669c, 0x89659f85c61417be, + 0xc8daad856011ea0c, 0x76a4b565b6fe7eae, 0xa469d085f6237312, + 0xaaf0365683a3e96c, 0x4dbb746f8424f7b8, 0x638755af4e4acc1, + 0x3d7807f5bde64486, 0x17be6d8f5bbb7639, 0x903f0cd44dc35dc, + 0x67b672eafdf1196c, 0xa676ff93ed4c82f1, 0x521d1004c5053d9d, + 0x37ba9ad09ccc9202, 0x84e54d297aacfb51, 0xa0b4b776a143445, + 0x820d471e20b348e, 0x1874383cb83d46dc, 0x97edeec7a1efe11c, + 0xb330e50b1bdc42aa, 0x1dd91955ce70e032, 0xa514cdb88f2939d5, + 0x2791233fd90db9d3, 0x7b670a4cc50f7a9b, 0x77c07d2a05c6dfa5, + 0xe3778b6646d0a6fa, 0xb39c8eda47b56749, 0x933ed448addbef28, + 0xaf846af6ab7d0bf4, 0xe5af208eb666e49, 0x5e6622f73534cd6a, + 0x297daeca42ef5b6e, 0x862daef3d35539a6, 0xe68722498f8e1ea9, + 0x981c53093dc0d572, 0xfa09b0bfbf86fbf5, 0x30b1e96166219f15, + 0x70e7d466bdc4fb83, 0x5a66736e35f2a8e9, 0xcddb59d2b7c1baef, + 0xd6c7d247d26d8996, 0xea4e39eac8de1ba3, 0x539c8bb19fa3aff2, + 0x9f90e4c5fd508d8, 0xa34e5956fbaf3385, 0x2e2f8e151d3ef375, + 0x173691e9b83faec1, 0xb85a8d56bf016379, 0x8382381267408ae3, + 0xb90f901bbdc0096d, 0x7c6ad32933bcec65, 0x76bb5e2f2c8ad595, + 0x390f851a6cf46d28, 0xc3e6064da1c2da72, 0xc52a0c101cfa5389, + 0xd78eaf84a3fbc530, 0x3781b9e2288b997e, 0x73c2f6dea83d05c4, + 0x4228e364c5b5ed7, 0x9d7a3edf0da43911, 0x8edcfeda24686756, + 0x5e7667a7b7a9b3a1, 0x4c4f389fa143791d, 0xb08bc1023da7cddc, + 0x7ab4be3ae529b1cc, 0x754e6132dbe74ff9, 0x71635442a839df45, + 0x2f6fb1643fbe52de, 0x961e0a42cf7a8177, 0xf3b45d83d89ef2ea, + 0xee3de4cf4a6e3e9b, 0xcd6848542c3295e7, 0xe4cee1664c78662f, + 0x9947548b474c68c4, 0x25d73777a5ed8b0b, 0xc915b1d636b7fc, + 0x21c2ba75d9b0d2da, 0x5f6b5dcf608a64a1, 0xdcf333255ff9570c, + 0x633b922418ced4ee, 0xc136dde0b004b34a, 0x58cc83b05d4b2f5a, + 0x5eb424dda28e42d2, 0x62df47369739cd98, 0xb4e0b42485e4ce17, + 0x16e1f0c1f9a8d1e7, 0x8ec3916707560ebf, 0x62ba6e2df2cc9db3, + 0xcbf9f4ff77d83a16, 0x78d9d7d07d2bbcc4, 0xef554ce1e02c41f4, + 0x8d7581127eccf94d, 0xa9b53336cb3c8a05, 0x38c42c0bf45c4f91, + 0x640893cdf4488863, 0x80ec34bc575ea568, 0x39f324f5b48eaa40, + 0xe9d9ed1f8eff527f, 0x9224fc058cc5a214, 0xbaba00b04cfe7741, + 0x309a9f120fcf52af, 0xa558f3ec65626212, 0x424bec8b7adabe2f, + 0x41622513a6aea433, 0xb88da2d5324ca798, 0xd287733b245528a4, + 0x9a44697e6d68aec3, 0x7b1093be2f49bb28, 0x50bbec632e3d8aad, + 0x6cd90723e1ea8283, 0x897b9e7431b02bf3, 0x219efdcb338a7047, + 0x3b0311f0a27c0656, 0xdb17bf91c0db96e7, 0x8cd4fd6b4e85a5b2, + 0xfab071054ba6409d, 0x40d6fe831fa9dfd9, 0xaf358debad7d791e, + 0xeb8d0e25a65e3e58, 0xbbcbd3df14e08580, 0xcf751f27ecdab2b, + 0x2b4da14f2613d8f4 +}; + +#endif /* ZSTD_LDM_GEARTAB_H */ diff --git a/lib/zstd/compress/zstd_opt.c b/lib/zstd/compress/zstd_opt.c new file mode 100644 index 000000000000..402a7e5c76b2 --- /dev/null +++ b/lib/zstd/compress/zstd_opt.c @@ -0,0 +1,1345 @@ +/* + * Copyright (c) Przemyslaw Skibinski, Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include "zstd_compress_internal.h" +#include "hist.h" +#include "zstd_opt.h" + + +#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ +#define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ +#define ZSTD_MAX_PRICE (1<<30) + +#define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ + + +/*-************************************* +* Price functions for optimal parser +***************************************/ + +#if 0 /* approximation at bit level */ +# define BITCOST_ACCURACY 0 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat)) +#elif 0 /* fractional bit accuracy */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat)) +#else /* opt==approx, ultra==accurate */ +# define BITCOST_ACCURACY 8 +# define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY) +# define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat)) +#endif + +MEM_STATIC U32 ZSTD_bitWeight(U32 stat) +{ + return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER); +} + +MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat) +{ + U32 const stat = rawStat + 1; + U32 const hb = ZSTD_highbit32(stat); + U32 const BWeight = hb * BITCOST_MULTIPLIER; + U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb; + U32 const weight = BWeight + FWeight; + assert(hb + BITCOST_ACCURACY < 31); + return weight; +} + +#if (DEBUGLEVEL>=2) +/* debugging function, + * @return price in bytes as fractional value + * for debug messages only */ +MEM_STATIC double ZSTD_fCost(U32 price) +{ + return (double)price / (BITCOST_MULTIPLIER*8); +} +#endif + +static int ZSTD_compressedLiterals(optState_t const* const optPtr) +{ + return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed; +} + +static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) +{ + if (ZSTD_compressedLiterals(optPtr)) + optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); + optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); + optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); + optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); +} + + +/* ZSTD_downscaleStat() : + * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) + * return the resulting sum of elements */ +static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) +{ + U32 s, sum=0; + DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1); + assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); + for (s=0; s> (ZSTD_FREQ_DIV+malus)); + sum += table[s]; + } + return sum; +} + +/* ZSTD_rescaleFreqs() : + * if first block (detected by optPtr->litLengthSum == 0) : init statistics + * take hints from dictionary if there is one + * or init from zero, using src for literals stats, or flat 1 for match symbols + * otherwise downscale existing stats, to be used as seed for next block. + */ +static void +ZSTD_rescaleFreqs(optState_t* const optPtr, + const BYTE* const src, size_t const srcSize, + int const optLevel) +{ + int const compressedLiterals = ZSTD_compressedLiterals(optPtr); + DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); + optPtr->priceType = zop_dynamic; + + if (optPtr->litLengthSum == 0) { /* first block : init */ + if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ + DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); + optPtr->priceType = zop_predef; + } + + assert(optPtr->symbolCosts != NULL); + if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { + /* huffman table presumed generated by dictionary */ + optPtr->priceType = zop_dynamic; + + if (compressedLiterals) { + unsigned lit; + assert(optPtr->litFreq != NULL); + optPtr->litSum = 0; + for (lit=0; lit<=MaxLit; lit++) { + U32 const scaleLog = 11; /* scale to 2K */ + U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); + assert(bitCost <= scaleLog); + optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litSum += optPtr->litFreq[lit]; + } } + + { unsigned ll; + FSE_CState_t llstate; + FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable); + optPtr->litLengthSum = 0; + for (ll=0; ll<=MaxLL; ll++) { + U32 const scaleLog = 10; /* scale to 1K */ + U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll); + assert(bitCost < scaleLog); + optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->litLengthSum += optPtr->litLengthFreq[ll]; + } } + + { unsigned ml; + FSE_CState_t mlstate; + FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable); + optPtr->matchLengthSum = 0; + for (ml=0; ml<=MaxML; ml++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml); + assert(bitCost < scaleLog); + optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->matchLengthSum += optPtr->matchLengthFreq[ml]; + } } + + { unsigned of; + FSE_CState_t ofstate; + FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable); + optPtr->offCodeSum = 0; + for (of=0; of<=MaxOff; of++) { + U32 const scaleLog = 10; + U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of); + assert(bitCost < scaleLog); + optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/; + optPtr->offCodeSum += optPtr->offCodeFreq[of]; + } } + + } else { /* not a dictionary */ + + assert(optPtr->litFreq != NULL); + if (compressedLiterals) { + unsigned lit = MaxLit; + HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + } + + { unsigned ll; + for (ll=0; ll<=MaxLL; ll++) + optPtr->litLengthFreq[ll] = 1; + } + optPtr->litLengthSum = MaxLL+1; + + { unsigned ml; + for (ml=0; ml<=MaxML; ml++) + optPtr->matchLengthFreq[ml] = 1; + } + optPtr->matchLengthSum = MaxML+1; + + { unsigned of; + for (of=0; of<=MaxOff; of++) + optPtr->offCodeFreq[of] = 1; + } + optPtr->offCodeSum = MaxOff+1; + + } + + } else { /* new block : re-use previous statistics, scaled down */ + + if (compressedLiterals) + optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); + optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0); + } + + ZSTD_setBasePrices(optPtr, optLevel); +} + +/* ZSTD_rawLiteralsCost() : + * price of literals (only) in specified segment (which length can be 0). + * does not include price of literalLength symbol */ +static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength, + const optState_t* const optPtr, + int optLevel) +{ + if (litLength == 0) return 0; + + if (!ZSTD_compressedLiterals(optPtr)) + return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ + + if (optPtr->priceType == zop_predef) + return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ + + /* dynamic statistics */ + { U32 price = litLength * optPtr->litSumBasePrice; + U32 u; + for (u=0; u < litLength; u++) { + assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */ + price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel); + } + return price; + } +} + +/* ZSTD_litLengthPrice() : + * cost of literalLength symbol */ +static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel) +{ + if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); + + /* dynamic statistics */ + { U32 const llCode = ZSTD_LLcode(litLength); + return (LL_bits[llCode] * BITCOST_MULTIPLIER) + + optPtr->litLengthSumBasePrice + - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); + } +} + +/* ZSTD_getMatchPrice() : + * Provides the cost of the match part (offset + matchLength) of a sequence + * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. + * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ +FORCE_INLINE_TEMPLATE U32 +ZSTD_getMatchPrice(U32 const offset, + U32 const matchLength, + const optState_t* const optPtr, + int const optLevel) +{ + U32 price; + U32 const offCode = ZSTD_highbit32(offset+1); + U32 const mlBase = matchLength - MINMATCH; + assert(matchLength >= MINMATCH); + + if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */ + return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER); + + /* dynamic statistics */ + price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel)); + if ((optLevel<2) /*static*/ && offCode >= 20) + price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */ + + /* match Length */ + { U32 const mlCode = ZSTD_MLcode(mlBase); + price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel)); + } + + price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */ + + DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price); + return price; +} + +/* ZSTD_updateStats() : + * assumption : literals + litLengtn <= iend */ +static void ZSTD_updateStats(optState_t* const optPtr, + U32 litLength, const BYTE* literals, + U32 offsetCode, U32 matchLength) +{ + /* literals */ + if (ZSTD_compressedLiterals(optPtr)) { + U32 u; + for (u=0; u < litLength; u++) + optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; + optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; + } + + /* literal Length */ + { U32 const llCode = ZSTD_LLcode(litLength); + optPtr->litLengthFreq[llCode]++; + optPtr->litLengthSum++; + } + + /* match offset code (0-2=>repCode; 3+=>offset+2) */ + { U32 const offCode = ZSTD_highbit32(offsetCode+1); + assert(offCode <= MaxOff); + optPtr->offCodeFreq[offCode]++; + optPtr->offCodeSum++; + } + + /* match Length */ + { U32 const mlBase = matchLength - MINMATCH; + U32 const mlCode = ZSTD_MLcode(mlBase); + optPtr->matchLengthFreq[mlCode]++; + optPtr->matchLengthSum++; + } +} + + +/* ZSTD_readMINMATCH() : + * function safe only for comparisons + * assumption : memPtr must be at least 4 bytes before end of buffer */ +MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length) +{ + switch (length) + { + default : + case 4 : return MEM_read32(memPtr); + case 3 : if (MEM_isLittleEndian()) + return MEM_read32(memPtr)<<8; + else + return MEM_read32(memPtr)>>8; + } +} + + +/* Update hashTable3 up to ip (excluded) + Assumption : always within prefix (i.e. not within extDict) */ +static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* const ip) +{ + U32* const hashTable3 = ms->hashTable3; + U32 const hashLog3 = ms->hashLog3; + const BYTE* const base = ms->window.base; + U32 idx = *nextToUpdate3; + U32 const target = (U32)(ip - base); + size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); + assert(hashLog3 > 0); + + while(idx < target) { + hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx; + idx++; + } + + *nextToUpdate3 = target; + return hashTable3[hash3]; +} + + +/*-************************************* +* Binary Tree search +***************************************/ +/** ZSTD_insertBt1() : add one or multiple positions to tree. + * ip : assumed <= iend-8 . + * @return : nb of positions added */ +static U32 ZSTD_insertBt1( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + U32 const mls, const int extDict) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32* const hashTable = ms->hashTable; + U32 const hashLog = cParams->hashLog; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask = (1 << btLog) - 1; + U32 matchIndex = hashTable[h]; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const base = ms->window.base; + const BYTE* const dictBase = ms->window.dictBase; + const U32 dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + const BYTE* match; + const U32 curr = (U32)(ip-base); + const U32 btLow = btMask >= curr ? 0 : curr - btMask; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = smallerPtr + 1; + U32 dummy32; /* to be nullified at the end */ + U32 const windowLow = ms->window.lowLimit; + U32 matchEndIdx = curr+8+1; + size_t bestLength = 8; + U32 nbCompares = 1U << cParams->searchLog; +#ifdef ZSTD_C_PREDICT + U32 predictedSmall = *(bt + 2*((curr-1)&btMask) + 0); + U32 predictedLarge = *(bt + 2*((curr-1)&btMask) + 1); + predictedSmall += (predictedSmall>0); + predictedLarge += (predictedLarge>0); +#endif /* ZSTD_C_PREDICT */ + + DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr); + + assert(ip <= iend-8); /* required for h calculation */ + hashTable[h] = curr; /* Update Hash Table */ + + assert(windowLow > 0); + while (nbCompares-- && (matchIndex >= windowLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(matchIndex < curr); + +#ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ + const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ + if (matchIndex == predictedSmall) { + /* no need to check length, result known */ + *smallerPtr = matchIndex; + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new "smaller" => larger of match */ + matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + predictedSmall = predictPtr[1] + (predictPtr[1]>0); + continue; + } + if (matchIndex == predictedLarge) { + *largerPtr = matchIndex; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + predictedLarge = predictPtr[0] + (predictPtr[0]>0); + continue; + } +#endif + + if (!extDict || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */ + match = base + matchIndex; + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend); + } else { + match = dictBase + matchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ + } + + if (matchLength > bestLength) { + bestLength = matchLength; + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + } + + if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */ + break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */ + } + + if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */ + /* match is smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */ + } else { + /* match is larger than current */ + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + { U32 positions = 0; + if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ + assert(matchEndIdx > curr + 8); + return MAX(positions, matchEndIdx - (curr + 8)); + } +} + +FORCE_INLINE_TEMPLATE +void ZSTD_updateTree_internal( + ZSTD_matchState_t* ms, + const BYTE* const ip, const BYTE* const iend, + const U32 mls, const ZSTD_dictMode_e dictMode) +{ + const BYTE* const base = ms->window.base; + U32 const target = (U32)(ip - base); + U32 idx = ms->nextToUpdate; + DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", + idx, target, dictMode); + + while(idx < target) { + U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); + assert(idx < (U32)(idx + forward)); + idx += forward; + } + assert((size_t)(ip - base) <= (size_t)(U32)(-1)); + assert((size_t)(iend - base) <= (size_t)(U32)(-1)); + ms->nextToUpdate = target; +} + +void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { + ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); +} + +FORCE_INLINE_TEMPLATE +U32 ZSTD_insertBtAndGetAllMatches ( + ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ + ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ + const U32 lengthToBeat, + U32 const mls /* template */) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + const BYTE* const base = ms->window.base; + U32 const curr = (U32)(ip-base); + U32 const hashLog = cParams->hashLog; + U32 const minMatch = (mls==3) ? 3 : 4; + U32* const hashTable = ms->hashTable; + size_t const h = ZSTD_hashPtr(ip, hashLog, mls); + U32 matchIndex = hashTable[h]; + U32* const bt = ms->chainTable; + U32 const btLog = cParams->chainLog - 1; + U32 const btMask= (1U << btLog) - 1; + size_t commonLengthSmaller=0, commonLengthLarger=0; + const BYTE* const dictBase = ms->window.dictBase; + U32 const dictLimit = ms->window.dictLimit; + const BYTE* const dictEnd = dictBase + dictLimit; + const BYTE* const prefixStart = base + dictLimit; + U32 const btLow = (btMask >= curr) ? 0 : curr - btMask; + U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog); + U32 const matchLow = windowLow ? windowLow : 1; + U32* smallerPtr = bt + 2*(curr&btMask); + U32* largerPtr = bt + 2*(curr&btMask) + 1; + U32 matchEndIdx = curr+8+1; /* farthest referenced position of any match => detects repetitive patterns */ + U32 dummy32; /* to be nullified at the end */ + U32 mnum = 0; + U32 nbCompares = 1U << cParams->searchLog; + + const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL; + const ZSTD_compressionParameters* const dmsCParams = + dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL; + const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL; + const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL; + U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0; + U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0; + U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0; + U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog; + U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog; + U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0; + U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit; + + size_t bestLength = lengthToBeat-1; + DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", curr); + + /* check repCode */ + assert(ll0 <= 1); /* necessarily 1 or 0 */ + { U32 const lastR = ZSTD_REP_NUM + ll0; + U32 repCode; + for (repCode = ll0; repCode < lastR; repCode++) { + U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; + U32 const repIndex = curr - repOffset; + U32 repLen = 0; + assert(curr >= dictLimit); + if (repOffset-1 /* intentional overflow, discards 0 and -1 */ < curr-dictLimit) { /* equivalent to `curr > repIndex >= dictLimit` */ + /* We must validate the repcode offset because when we're using a dictionary the + * valid offset range shrinks when the dictionary goes out of bounds. + */ + if ((repIndex >= windowLow) & (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repOffset, minMatch))) { + repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch; + } + } else { /* repIndex < dictLimit || repIndex >= curr */ + const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ? + dmsBase + repIndex - dmsIndexDelta : + dictBase + repIndex; + assert(curr >= windowLow); + if ( dictMode == ZSTD_extDict + && ( ((repOffset-1) /*intentional overflow*/ < curr - windowLow) /* equivalent to `curr > repIndex >= windowLow` */ + & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */) + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch; + } + if (dictMode == ZSTD_dictMatchState + && ( ((repOffset-1) /*intentional overflow*/ < curr - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `curr > repIndex >= dmsLowLimit` */ + & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */ + && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) { + repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch; + } } + /* save longer solution */ + if (repLen > bestLength) { + DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u", + repCode, ll0, repOffset, repLen); + bestLength = repLen; + matches[mnum].off = repCode - ll0; + matches[mnum].len = (U32)repLen; + mnum++; + if ( (repLen > sufficient_len) + | (ip+repLen == iLimit) ) { /* best possible */ + return mnum; + } } } } + + /* HC3 match finder */ + if ((mls == 3) /*static*/ && (bestLength < mls)) { + U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); + if ((matchIndex3 >= matchLow) + & (curr - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { + size_t mlen; + if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) { + const BYTE* const match = base + matchIndex3; + mlen = ZSTD_count(ip, match, iLimit); + } else { + const BYTE* const match = dictBase + matchIndex3; + mlen = ZSTD_count_2segments(ip, match, iLimit, dictEnd, prefixStart); + } + + /* save best solution */ + if (mlen >= mls /* == 3 > bestLength */) { + DEBUGLOG(8, "found small match with hlog3, of length %u", + (U32)mlen); + bestLength = mlen; + assert(curr > matchIndex3); + assert(mnum==0); /* no prior solution */ + matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE; + matches[0].len = (U32)mlen; + mnum = 1; + if ( (mlen > sufficient_len) | + (ip+mlen == iLimit) ) { /* best possible length */ + ms->nextToUpdate = curr+1; /* skip insertion */ + return 1; + } } } + /* no dictMatchState lookup: dicts don't have a populated HC3 table */ + } + + hashTable[h] = curr; /* Update Hash Table */ + + while (nbCompares-- && (matchIndex >= matchLow)) { + U32* const nextPtr = bt + 2*(matchIndex & btMask); + const BYTE* match; + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + assert(curr > matchIndex); + + if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { + assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ + match = base + matchIndex; + if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ + matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); + } else { + match = dictBase + matchIndex; + assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); + if (matchIndex+matchLength >= dictLimit) + match = base + matchIndex; /* prepare for match[matchLength] read */ + } + + if (matchLength > bestLength) { + DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)", + (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE); + assert(matchEndIdx > matchIndex); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */ + break; /* drop, to preserve bt consistency (miss a little bit of compression) */ + } + } + + if (match[matchLength] < ip[matchLength]) { + /* match smaller than current */ + *smallerPtr = matchIndex; /* update smaller idx */ + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + smallerPtr = nextPtr+1; /* new candidate => larger than match, which was smaller than current */ + matchIndex = nextPtr[1]; /* new matchIndex, larger than previous, closer to current */ + } else { + *largerPtr = matchIndex; + commonLengthLarger = matchLength; + if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */ + largerPtr = nextPtr; + matchIndex = nextPtr[0]; + } } + + *smallerPtr = *largerPtr = 0; + + if (dictMode == ZSTD_dictMatchState && nbCompares) { + size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls); + U32 dictMatchIndex = dms->hashTable[dmsH]; + const U32* const dmsBt = dms->chainTable; + commonLengthSmaller = commonLengthLarger = 0; + while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) { + const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask); + size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ + const BYTE* match = dmsBase + dictMatchIndex; + matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart); + if (dictMatchIndex+matchLength >= dmsHighLimit) + match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */ + + if (matchLength > bestLength) { + matchIndex = dictMatchIndex + dmsIndexDelta; + DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)", + (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE); + if (matchLength > matchEndIdx - matchIndex) + matchEndIdx = matchIndex + (U32)matchLength; + bestLength = matchLength; + matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE; + matches[mnum].len = (U32)matchLength; + mnum++; + if ( (matchLength > ZSTD_OPT_NUM) + | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) { + break; /* drop, to guarantee consistency (miss a little bit of compression) */ + } + } + + if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */ + if (match[matchLength] < ip[matchLength]) { + commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ + dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */ + } else { + /* match is larger than current */ + commonLengthLarger = matchLength; + dictMatchIndex = nextPtr[0]; + } + } + } + + assert(matchEndIdx > curr+8); + ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */ + return mnum; +} + + +FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( + ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */ + ZSTD_matchState_t* ms, + U32* nextToUpdate3, + const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, + const U32 rep[ZSTD_REP_NUM], + U32 const ll0, + U32 const lengthToBeat) +{ + const ZSTD_compressionParameters* const cParams = &ms->cParams; + U32 const matchLengthSearch = cParams->minMatch; + DEBUGLOG(8, "ZSTD_BtGetAllMatches"); + if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ + ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); + switch(matchLengthSearch) + { + case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3); + default : + case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4); + case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5); + case 7 : + case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6); + } +} + +/************************* +* LDM helper functions * +*************************/ + +/* Struct containing info needed to make decision about ldm inclusion */ +typedef struct { + rawSeqStore_t seqStore; /* External match candidates store for this block */ + U32 startPosInBlock; /* Start position of the current match candidate */ + U32 endPosInBlock; /* End position of the current match candidate */ + U32 offset; /* Offset of the match candidate */ +} ZSTD_optLdm_t; + +/* ZSTD_optLdm_skipRawSeqStoreBytes(): + * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'. + */ +static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) { + U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes); + while (currPos && rawSeqStore->pos < rawSeqStore->size) { + rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos]; + if (currPos >= currSeq.litLength + currSeq.matchLength) { + currPos -= currSeq.litLength + currSeq.matchLength; + rawSeqStore->pos++; + } else { + rawSeqStore->posInSequence = currPos; + break; + } + } + if (currPos == 0 || rawSeqStore->pos == rawSeqStore->size) { + rawSeqStore->posInSequence = 0; + } +} + +/* ZSTD_opt_getNextMatchAndUpdateSeqStore(): + * Calculates the beginning and end of the next match in the current block. + * Updates 'pos' and 'posInSequence' of the ldmSeqStore. + */ +static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock, + U32 blockBytesRemaining) { + rawSeq currSeq; + U32 currBlockEndPos; + U32 literalsBytesRemaining; + U32 matchBytesRemaining; + + /* Setting match end position to MAX to ensure we never use an LDM during this block */ + if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { + optLdm->startPosInBlock = UINT_MAX; + optLdm->endPosInBlock = UINT_MAX; + return; + } + /* Calculate appropriate bytes left in matchLength and litLength after adjusting + based on ldmSeqStore->posInSequence */ + currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos]; + assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength); + currBlockEndPos = currPosInBlock + blockBytesRemaining; + literalsBytesRemaining = (optLdm->seqStore.posInSequence < currSeq.litLength) ? + currSeq.litLength - (U32)optLdm->seqStore.posInSequence : + 0; + matchBytesRemaining = (literalsBytesRemaining == 0) ? + currSeq.matchLength - ((U32)optLdm->seqStore.posInSequence - currSeq.litLength) : + currSeq.matchLength; + + /* If there are more literal bytes than bytes remaining in block, no ldm is possible */ + if (literalsBytesRemaining >= blockBytesRemaining) { + optLdm->startPosInBlock = UINT_MAX; + optLdm->endPosInBlock = UINT_MAX; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, blockBytesRemaining); + return; + } + + /* Matches may be < MINMATCH by this process. In that case, we will reject them + when we are deciding whether or not to add the ldm */ + optLdm->startPosInBlock = currPosInBlock + literalsBytesRemaining; + optLdm->endPosInBlock = optLdm->startPosInBlock + matchBytesRemaining; + optLdm->offset = currSeq.offset; + + if (optLdm->endPosInBlock > currBlockEndPos) { + /* Match ends after the block ends, we can't use the whole match */ + optLdm->endPosInBlock = currBlockEndPos; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, currBlockEndPos - currPosInBlock); + } else { + /* Consume nb of bytes equal to size of sequence left */ + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, literalsBytesRemaining + matchBytesRemaining); + } +} + +/* ZSTD_optLdm_maybeAddMatch(): + * Adds a match if it's long enough, based on it's 'matchStartPosInBlock' + * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches' + */ +static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches, + ZSTD_optLdm_t* optLdm, U32 currPosInBlock) { + U32 posDiff = currPosInBlock - optLdm->startPosInBlock; + /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */ + U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff; + U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE; + + /* Ensure that current block position is not outside of the match */ + if (currPosInBlock < optLdm->startPosInBlock + || currPosInBlock >= optLdm->endPosInBlock + || candidateMatchLength < MINMATCH) { + return; + } + + if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) { + DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u", + candidateOffCode, candidateMatchLength, currPosInBlock); + matches[*nbMatches].len = candidateMatchLength; + matches[*nbMatches].off = candidateOffCode; + (*nbMatches)++; + } +} + +/* ZSTD_optLdm_processMatchCandidate(): + * Wrapper function to update ldm seq store and call ldm functions as necessary. + */ +static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches, + U32 currPosInBlock, U32 remainingBytes) { + if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) { + return; + } + + if (currPosInBlock >= optLdm->endPosInBlock) { + if (currPosInBlock > optLdm->endPosInBlock) { + /* The position at which ZSTD_optLdm_processMatchCandidate() is called is not necessarily + * at the end of a match from the ldm seq store, and will often be some bytes + * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots" + */ + U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock; + ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot); + } + ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes); + } + ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock); +} + +/*-******************************* +* Optimal parser +*********************************/ + + +static U32 ZSTD_totalLen(ZSTD_optimal_t sol) +{ + return sol.litlen + sol.mlen; +} + +#if 0 /* debug */ + +static void +listStats(const U32* table, int lastEltID) +{ + int const nbElts = lastEltID + 1; + int enb; + for (enb=0; enb < nbElts; enb++) { + (void)table; + /* RAWLOG(2, "%3i:%3i, ", enb, table[enb]); */ + RAWLOG(2, "%4i,", table[enb]); + } + RAWLOG(2, " \n"); +} + +#endif + +FORCE_INLINE_TEMPLATE size_t +ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize, + const int optLevel, + const ZSTD_dictMode_e dictMode) +{ + optState_t* const optStatePtr = &ms->opt; + const BYTE* const istart = (const BYTE*)src; + const BYTE* ip = istart; + const BYTE* anchor = istart; + const BYTE* const iend = istart + srcSize; + const BYTE* const ilimit = iend - 8; + const BYTE* const base = ms->window.base; + const BYTE* const prefixStart = base + ms->window.dictLimit; + const ZSTD_compressionParameters* const cParams = &ms->cParams; + + U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); + U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; + U32 nextToUpdate3 = ms->nextToUpdate; + + ZSTD_optimal_t* const opt = optStatePtr->priceTable; + ZSTD_match_t* const matches = optStatePtr->matchTable; + ZSTD_optimal_t lastSequence; + ZSTD_optLdm_t optLdm; + + optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore; + optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0; + ZSTD_opt_getNextMatchAndUpdateSeqStore(&optLdm, (U32)(ip-istart), (U32)(iend-ip)); + + /* init */ + DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", + (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); + assert(optLevel <= 2); + ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); + ip += (ip==prefixStart); + + /* Match Loop */ + while (ip < ilimit) { + U32 cur, last_pos = 0; + + /* find first match */ + { U32 const litlen = (U32)(ip - anchor); + U32 const ll0 = !litlen; + U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); + ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, + (U32)(ip-istart), (U32)(iend - ip)); + if (!nbMatches) { ip++; continue; } + + /* initialize opt[0] */ + { U32 i ; for (i=0; i immediate encoding */ + { U32 const maxML = matches[nbMatches-1].len; + U32 const maxOffset = matches[nbMatches-1].off; + DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", + nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); + + if (maxML > sufficient_len) { + lastSequence.litlen = litlen; + lastSequence.mlen = maxML; + lastSequence.off = maxOffset; + DEBUGLOG(6, "large match (%u>%u), immediate encoding", + maxML, sufficient_len); + cur = 0; + last_pos = ZSTD_totalLen(lastSequence); + goto _shortestPath; + } } + + /* set prices for first matches starting position == 0 */ + { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 pos; + U32 matchNb; + for (pos = 1; pos < minMatch; pos++) { + opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */ + } + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const end = matches[matchNb].len; + for ( ; pos <= end ; pos++ ) { + U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel); + U32 const sequencePrice = literalsPrice + matchPrice; + DEBUGLOG(7, "rPos:%u => set initial price : %.2f", + pos, ZSTD_fCost(sequencePrice)); + opt[pos].mlen = pos; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = sequencePrice; + } } + last_pos = pos-1; + } + } + + /* check further positions */ + for (cur = 1; cur <= last_pos; cur++) { + const BYTE* const inr = ip + cur; + assert(cur < ZSTD_OPT_NUM); + DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur) + + /* Fix current position with one literal if cheaper */ + { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1; + int const price = opt[cur-1].price + + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel) + + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel) + - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel); + assert(price < 1000000000); /* overflow check */ + if (price <= opt[cur].price) { + DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen, + opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]); + opt[cur].mlen = 0; + opt[cur].off = 0; + opt[cur].litlen = litlen; + opt[cur].price = price; + } else { + DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)", + inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), + opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]); + } + } + + /* Set the repcodes of the current position. We must do it here + * because we rely on the repcodes of the 2nd to last sequence being + * correct to set the next chunks repcodes during the backward + * traversal. + */ + ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t)); + assert(cur >= opt[cur].mlen); + if (opt[cur].mlen != 0) { + U32 const prev = cur - opt[cur].mlen; + repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0); + ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t)); + } else { + ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t)); + } + + /* last match must start at a minimum distance of 8 from oend */ + if (inr > ilimit) continue; + + if (cur == last_pos) break; + + if ( (optLevel==0) /*static_test*/ + && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) { + DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1); + continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */ + } + + { U32 const ll0 = (opt[cur].mlen != 0); + U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; + U32 const previousPrice = opt[cur].price; + U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); + U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); + U32 matchNb; + + ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches, + (U32)(inr-istart), (U32)(iend-inr)); + + if (!nbMatches) { + DEBUGLOG(7, "rPos:%u : no match found", cur); + continue; + } + + { U32 const maxML = matches[nbMatches-1].len; + DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u", + inr-istart, cur, nbMatches, maxML); + + if ( (maxML > sufficient_len) + || (cur + maxML >= ZSTD_OPT_NUM) ) { + lastSequence.mlen = maxML; + lastSequence.off = matches[nbMatches-1].off; + lastSequence.litlen = litlen; + cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */ + last_pos = cur + ZSTD_totalLen(lastSequence); + if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */ + goto _shortestPath; + } } + + /* set prices using matches found at position == cur */ + for (matchNb = 0; matchNb < nbMatches; matchNb++) { + U32 const offset = matches[matchNb].off; + U32 const lastML = matches[matchNb].len; + U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch; + U32 mlen; + + DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u", + matchNb, matches[matchNb].off, lastML, litlen); + + for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */ + U32 const pos = cur + mlen; + int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel); + + if ((pos > last_pos) || (price < opt[pos].price)) { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */ + opt[pos].mlen = mlen; + opt[pos].off = offset; + opt[pos].litlen = litlen; + opt[pos].price = price; + } else { + DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)", + pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price)); + if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */ + } + } } } + } /* for (cur = 1; cur <= last_pos; cur++) */ + + lastSequence = opt[last_pos]; + cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */ + assert(cur < ZSTD_OPT_NUM); /* control overflow*/ + +_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */ + assert(opt[0].mlen == 0); + + /* Set the next chunk's repcodes based on the repcodes of the beginning + * of the last match, and the last sequence. This avoids us having to + * update them while traversing the sequences. + */ + if (lastSequence.mlen != 0) { + repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0); + ZSTD_memcpy(rep, &reps, sizeof(reps)); + } else { + ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t)); + } + + { U32 const storeEnd = cur + 1; + U32 storeStart = storeEnd; + U32 seqPos = cur; + + DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)", + last_pos, cur); (void)last_pos; + assert(storeEnd < ZSTD_OPT_NUM); + DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off); + opt[storeEnd] = lastSequence; + while (seqPos > 0) { + U32 const backDist = ZSTD_totalLen(opt[seqPos]); + storeStart--; + DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)", + seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off); + opt[storeStart] = opt[seqPos]; + seqPos = (seqPos > backDist) ? seqPos - backDist : 0; + } + + /* save sequences */ + DEBUGLOG(6, "sending selected sequences into seqStore") + { U32 storePos; + for (storePos=storeStart; storePos <= storeEnd; storePos++) { + U32 const llen = opt[storePos].litlen; + U32 const mlen = opt[storePos].mlen; + U32 const offCode = opt[storePos].off; + U32 const advance = llen + mlen; + DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", + anchor - istart, (unsigned)llen, (unsigned)mlen); + + if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ + assert(storePos == storeEnd); /* must be last sequence */ + ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ + continue; /* will finish */ + } + + assert(anchor + llen <= iend); + ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen); + ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH); + anchor += advance; + ip = anchor; + } } + ZSTD_setBasePrices(optStatePtr, optLevel); + } + } /* while (ip < ilimit) */ + + /* Return the last literals size */ + return (size_t)(iend - anchor); +} + + +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btopt"); + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict); +} + + +/* used in 2-pass strategy */ +static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) +{ + U32 s, sum=0; + assert(ZSTD_FREQ_DIV+bonus >= 0); + for (s=0; slitSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); + optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); + optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); + optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); +} + +/* ZSTD_initStats_ultra(): + * make a first compression pass, just to seed stats with more accurate starting values. + * only works on first block, with no dictionary and no ldm. + * this function cannot error, hence its contract must be respected. + */ +static void +ZSTD_initStats_ultra(ZSTD_matchState_t* ms, + seqStore_t* seqStore, + U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ + ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep)); + + DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); + assert(ms->opt.litLengthSum == 0); /* first block */ + assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ + assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ + assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ + + ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ + + /* invalidate first scan from history */ + ZSTD_resetSeqStore(seqStore); + ms->window.base -= srcSize; + ms->window.dictLimit += (U32)srcSize; + ms->window.lowLimit = ms->window.dictLimit; + ms->nextToUpdate = ms->window.dictLimit; + + /* re-inforce weight of collected statistics */ + ZSTD_upscaleStats(&ms->opt); +} + +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + U32 const curr = (U32)((const BYTE*)src - ms->window.base); + DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); + + /* 2-pass strategy: + * this strategy makes a first pass over first block to collect statistics + * and seed next round's statistics with it. + * After 1st pass, function forgets everything, and starts a new block. + * Consequently, this can only work if no data has been previously loaded in tables, + * aka, no dictionary, no prefix, no ldm preprocessing. + * The compression ratio gain is generally small (~0.5% on first block), + * the cost is 2x cpu time on first block. */ + assert(srcSize <= ZSTD_BLOCKSIZE_MAX); + if ( (ms->opt.litLengthSum==0) /* first block */ + && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ + && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ + && (curr == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ + && (srcSize > ZSTD_PREDEF_THRESHOLD) + ) { + ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); + } + + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); +} + +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState); +} + +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict); +} + +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + const void* src, size_t srcSize) +{ + return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); +} + +/* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */ diff --git a/lib/zstd/compress/zstd_opt.h b/lib/zstd/compress/zstd_opt.h new file mode 100644 index 000000000000..22b862858ba7 --- /dev/null +++ b/lib/zstd/compress/zstd_opt.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#ifndef ZSTD_OPT_H +#define ZSTD_OPT_H + + +#include "zstd_compress_internal.h" + +/* used in ZSTD_loadDictionaryContent() */ +void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend); + +size_t ZSTD_compressBlock_btopt( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra2( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + +size_t ZSTD_compressBlock_btopt_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_dictMatchState( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + +size_t ZSTD_compressBlock_btopt_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); +size_t ZSTD_compressBlock_btultra_extDict( + ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], + void const* src, size_t srcSize); + + /* note : no btultra2 variant for extDict nor dictMatchState, + * because btultra2 is not meant to work with dictionaries + * and is only specific for the first block (no prefix) */ + + +#endif /* ZSTD_OPT_H */ diff --git a/lib/zstd/decompress.c b/lib/zstd/decompress.c deleted file mode 100644 index 66cd487a326a..000000000000 --- a/lib/zstd/decompress.c +++ /dev/null @@ -1,2531 +0,0 @@ -/** - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - */ - -/* *************************************************************** -* Tuning parameters -*****************************************************************/ -/*! -* MAXWINDOWSIZE_DEFAULT : -* maximum window size accepted by DStream, by default. -* Frames requiring more memory will be rejected. -*/ -#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT -#define ZSTD_MAXWINDOWSIZE_DEFAULT ((1 << ZSTD_WINDOWLOG_MAX) + 1) /* defined within zstd.h */ -#endif - -/*-******************************************************* -* Dependencies -*********************************************************/ -#include "fse.h" -#include "huf.h" -#include "mem.h" /* low level memory routines */ -#include "zstd_internal.h" -#include -#include -#include /* memcpy, memmove, memset */ - -#define ZSTD_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) - -/*-************************************* -* Macros -***************************************/ -#define ZSTD_isError ERR_isError /* for inlining */ -#define FSE_isError ERR_isError -#define HUF_isError ERR_isError - -/*_******************************************************* -* Memory operations -**********************************************************/ -static void ZSTD_copy4(void *dst, const void *src) { memcpy(dst, src, 4); } - -/*-************************************************************* -* Context management -***************************************************************/ -typedef enum { - ZSTDds_getFrameHeaderSize, - ZSTDds_decodeFrameHeader, - ZSTDds_decodeBlockHeader, - ZSTDds_decompressBlock, - ZSTDds_decompressLastBlock, - ZSTDds_checkChecksum, - ZSTDds_decodeSkippableHeader, - ZSTDds_skipFrame -} ZSTD_dStage; - -typedef struct { - FSE_DTable LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)]; - FSE_DTable OFTable[FSE_DTABLE_SIZE_U32(OffFSELog)]; - FSE_DTable MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)]; - HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ - U64 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32 / 2]; - U32 rep[ZSTD_REP_NUM]; -} ZSTD_entropyTables_t; - -struct ZSTD_DCtx_s { - const FSE_DTable *LLTptr; - const FSE_DTable *MLTptr; - const FSE_DTable *OFTptr; - const HUF_DTable *HUFptr; - ZSTD_entropyTables_t entropy; - const void *previousDstEnd; /* detect continuity */ - const void *base; /* start of curr segment */ - const void *vBase; /* virtual start of previous segment if it was just before curr one */ - const void *dictEnd; /* end of previous segment */ - size_t expected; - ZSTD_frameParams fParams; - blockType_e bType; /* used in ZSTD_decompressContinue(), to transfer blockType between header decoding and block decoding stages */ - ZSTD_dStage stage; - U32 litEntropy; - U32 fseEntropy; - struct xxh64_state xxhState; - size_t headerSize; - U32 dictID; - const BYTE *litPtr; - ZSTD_customMem customMem; - size_t litSize; - size_t rleSize; - BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH]; - BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; -}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ - -size_t ZSTD_DCtxWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DCtx)); } - -size_t ZSTD_decompressBegin(ZSTD_DCtx *dctx) -{ - dctx->expected = ZSTD_frameHeaderSize_prefix; - dctx->stage = ZSTDds_getFrameHeaderSize; - dctx->previousDstEnd = NULL; - dctx->base = NULL; - dctx->vBase = NULL; - dctx->dictEnd = NULL; - dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ - dctx->litEntropy = dctx->fseEntropy = 0; - dctx->dictID = 0; - ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); - memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ - dctx->LLTptr = dctx->entropy.LLTable; - dctx->MLTptr = dctx->entropy.MLTable; - dctx->OFTptr = dctx->entropy.OFTable; - dctx->HUFptr = dctx->entropy.hufTable; - return 0; -} - -ZSTD_DCtx *ZSTD_createDCtx_advanced(ZSTD_customMem customMem) -{ - ZSTD_DCtx *dctx; - - if (!customMem.customAlloc || !customMem.customFree) - return NULL; - - dctx = (ZSTD_DCtx *)ZSTD_malloc(sizeof(ZSTD_DCtx), customMem); - if (!dctx) - return NULL; - memcpy(&dctx->customMem, &customMem, sizeof(customMem)); - ZSTD_decompressBegin(dctx); - return dctx; -} - -ZSTD_DCtx *ZSTD_initDCtx(void *workspace, size_t workspaceSize) -{ - ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); - return ZSTD_createDCtx_advanced(stackMem); -} - -size_t ZSTD_freeDCtx(ZSTD_DCtx *dctx) -{ - if (dctx == NULL) - return 0; /* support free on NULL */ - ZSTD_free(dctx, dctx->customMem); - return 0; /* reserved as a potential error code in the future */ -} - -void ZSTD_copyDCtx(ZSTD_DCtx *dstDCtx, const ZSTD_DCtx *srcDCtx) -{ - size_t const workSpaceSize = (ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH) + ZSTD_frameHeaderSize_max; - memcpy(dstDCtx, srcDCtx, sizeof(ZSTD_DCtx) - workSpaceSize); /* no need to copy workspace */ -} - -static void ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict); - -/*-************************************************************* -* Decompression section -***************************************************************/ - -/*! ZSTD_isFrame() : - * Tells if the content of `buffer` starts with a valid Frame Identifier. - * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. - * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. - * Note 3 : Skippable Frame Identifiers are considered valid. */ -unsigned ZSTD_isFrame(const void *buffer, size_t size) -{ - if (size < 4) - return 0; - { - U32 const magic = ZSTD_readLE32(buffer); - if (magic == ZSTD_MAGICNUMBER) - return 1; - if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) - return 1; - } - return 0; -} - -/** ZSTD_frameHeaderSize() : -* srcSize must be >= ZSTD_frameHeaderSize_prefix. -* @return : size of the Frame Header */ -static size_t ZSTD_frameHeaderSize(const void *src, size_t srcSize) -{ - if (srcSize < ZSTD_frameHeaderSize_prefix) - return ERROR(srcSize_wrong); - { - BYTE const fhd = ((const BYTE *)src)[4]; - U32 const dictID = fhd & 3; - U32 const singleSegment = (fhd >> 5) & 1; - U32 const fcsId = fhd >> 6; - return ZSTD_frameHeaderSize_prefix + !singleSegment + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + (singleSegment && !fcsId); - } -} - -/** ZSTD_getFrameParams() : -* decode Frame Header, or require larger `srcSize`. -* @return : 0, `fparamsPtr` is correctly filled, -* >0, `srcSize` is too small, result is expected `srcSize`, -* or an error code, which can be tested using ZSTD_isError() */ -size_t ZSTD_getFrameParams(ZSTD_frameParams *fparamsPtr, const void *src, size_t srcSize) -{ - const BYTE *ip = (const BYTE *)src; - - if (srcSize < ZSTD_frameHeaderSize_prefix) - return ZSTD_frameHeaderSize_prefix; - if (ZSTD_readLE32(src) != ZSTD_MAGICNUMBER) { - if ((ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { - if (srcSize < ZSTD_skippableHeaderSize) - return ZSTD_skippableHeaderSize; /* magic number + skippable frame length */ - memset(fparamsPtr, 0, sizeof(*fparamsPtr)); - fparamsPtr->frameContentSize = ZSTD_readLE32((const char *)src + 4); - fparamsPtr->windowSize = 0; /* windowSize==0 means a frame is skippable */ - return 0; - } - return ERROR(prefix_unknown); - } - - /* ensure there is enough `srcSize` to fully read/decode frame header */ - { - size_t const fhsize = ZSTD_frameHeaderSize(src, srcSize); - if (srcSize < fhsize) - return fhsize; - } - - { - BYTE const fhdByte = ip[4]; - size_t pos = 5; - U32 const dictIDSizeCode = fhdByte & 3; - U32 const checksumFlag = (fhdByte >> 2) & 1; - U32 const singleSegment = (fhdByte >> 5) & 1; - U32 const fcsID = fhdByte >> 6; - U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; - U32 windowSize = 0; - U32 dictID = 0; - U64 frameContentSize = 0; - if ((fhdByte & 0x08) != 0) - return ERROR(frameParameter_unsupported); /* reserved bits, which must be zero */ - if (!singleSegment) { - BYTE const wlByte = ip[pos++]; - U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; - if (windowLog > ZSTD_WINDOWLOG_MAX) - return ERROR(frameParameter_windowTooLarge); /* avoids issue with 1 << windowLog */ - windowSize = (1U << windowLog); - windowSize += (windowSize >> 3) * (wlByte & 7); - } - - switch (dictIDSizeCode) { - default: /* impossible */ - case 0: break; - case 1: - dictID = ip[pos]; - pos++; - break; - case 2: - dictID = ZSTD_readLE16(ip + pos); - pos += 2; - break; - case 3: - dictID = ZSTD_readLE32(ip + pos); - pos += 4; - break; - } - switch (fcsID) { - default: /* impossible */ - case 0: - if (singleSegment) - frameContentSize = ip[pos]; - break; - case 1: frameContentSize = ZSTD_readLE16(ip + pos) + 256; break; - case 2: frameContentSize = ZSTD_readLE32(ip + pos); break; - case 3: frameContentSize = ZSTD_readLE64(ip + pos); break; - } - if (!windowSize) - windowSize = (U32)frameContentSize; - if (windowSize > windowSizeMax) - return ERROR(frameParameter_windowTooLarge); - fparamsPtr->frameContentSize = frameContentSize; - fparamsPtr->windowSize = windowSize; - fparamsPtr->dictID = dictID; - fparamsPtr->checksumFlag = checksumFlag; - } - return 0; -} - -/** ZSTD_getFrameContentSize() : -* compatible with legacy mode -* @return : decompressed size of the single frame pointed to be `src` if known, otherwise -* - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined -* - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ -unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) -{ - { - ZSTD_frameParams fParams; - if (ZSTD_getFrameParams(&fParams, src, srcSize) != 0) - return ZSTD_CONTENTSIZE_ERROR; - if (fParams.windowSize == 0) { - /* Either skippable or empty frame, size == 0 either way */ - return 0; - } else if (fParams.frameContentSize != 0) { - return fParams.frameContentSize; - } else { - return ZSTD_CONTENTSIZE_UNKNOWN; - } - } -} - -/** ZSTD_findDecompressedSize() : - * compatible with legacy mode - * `srcSize` must be the exact length of some number of ZSTD compressed and/or - * skippable frames - * @return : decompressed size of the frames contained */ -unsigned long long ZSTD_findDecompressedSize(const void *src, size_t srcSize) -{ - { - unsigned long long totalDstSize = 0; - while (srcSize >= ZSTD_frameHeaderSize_prefix) { - const U32 magicNumber = ZSTD_readLE32(src); - - if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { - size_t skippableSize; - if (srcSize < ZSTD_skippableHeaderSize) - return ERROR(srcSize_wrong); - skippableSize = ZSTD_readLE32((const BYTE *)src + 4) + ZSTD_skippableHeaderSize; - if (srcSize < skippableSize) { - return ZSTD_CONTENTSIZE_ERROR; - } - - src = (const BYTE *)src + skippableSize; - srcSize -= skippableSize; - continue; - } - - { - unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); - if (ret >= ZSTD_CONTENTSIZE_ERROR) - return ret; - - /* check for overflow */ - if (totalDstSize + ret < totalDstSize) - return ZSTD_CONTENTSIZE_ERROR; - totalDstSize += ret; - } - { - size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); - if (ZSTD_isError(frameSrcSize)) { - return ZSTD_CONTENTSIZE_ERROR; - } - - src = (const BYTE *)src + frameSrcSize; - srcSize -= frameSrcSize; - } - } - - if (srcSize) { - return ZSTD_CONTENTSIZE_ERROR; - } - - return totalDstSize; - } -} - -/** ZSTD_decodeFrameHeader() : -* `headerSize` must be the size provided by ZSTD_frameHeaderSize(). -* @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ -static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx *dctx, const void *src, size_t headerSize) -{ - size_t const result = ZSTD_getFrameParams(&(dctx->fParams), src, headerSize); - if (ZSTD_isError(result)) - return result; /* invalid header */ - if (result > 0) - return ERROR(srcSize_wrong); /* headerSize too small */ - if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) - return ERROR(dictionary_wrong); - if (dctx->fParams.checksumFlag) - xxh64_reset(&dctx->xxhState, 0); - return 0; -} - -typedef struct { - blockType_e blockType; - U32 lastBlock; - U32 origSize; -} blockProperties_t; - -/*! ZSTD_getcBlockSize() : -* Provides the size of compressed block from block header `src` */ -size_t ZSTD_getcBlockSize(const void *src, size_t srcSize, blockProperties_t *bpPtr) -{ - if (srcSize < ZSTD_blockHeaderSize) - return ERROR(srcSize_wrong); - { - U32 const cBlockHeader = ZSTD_readLE24(src); - U32 const cSize = cBlockHeader >> 3; - bpPtr->lastBlock = cBlockHeader & 1; - bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); - bpPtr->origSize = cSize; /* only useful for RLE */ - if (bpPtr->blockType == bt_rle) - return 1; - if (bpPtr->blockType == bt_reserved) - return ERROR(corruption_detected); - return cSize; - } -} - -static size_t ZSTD_copyRawBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - if (srcSize > dstCapacity) - return ERROR(dstSize_tooSmall); - memcpy(dst, src, srcSize); - return srcSize; -} - -static size_t ZSTD_setRleBlock(void *dst, size_t dstCapacity, const void *src, size_t srcSize, size_t regenSize) -{ - if (srcSize != 1) - return ERROR(srcSize_wrong); - if (regenSize > dstCapacity) - return ERROR(dstSize_tooSmall); - memset(dst, *(const BYTE *)src, regenSize); - return regenSize; -} - -/*! ZSTD_decodeLiteralsBlock() : - @return : nb of bytes read from src (< srcSize ) */ -size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx *dctx, const void *src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ -{ - if (srcSize < MIN_CBLOCK_SIZE) - return ERROR(corruption_detected); - - { - const BYTE *const istart = (const BYTE *)src; - symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); - - switch (litEncType) { - case set_repeat: - if (dctx->litEntropy == 0) - return ERROR(dictionary_corrupted); - fallthrough; - case set_compressed: - if (srcSize < 5) - return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */ - { - size_t lhSize, litSize, litCSize; - U32 singleStream = 0; - U32 const lhlCode = (istart[0] >> 2) & 3; - U32 const lhc = ZSTD_readLE32(istart); - switch (lhlCode) { - case 0: - case 1: - default: /* note : default is impossible, since lhlCode into [0..3] */ - /* 2 - 2 - 10 - 10 */ - singleStream = !lhlCode; - lhSize = 3; - litSize = (lhc >> 4) & 0x3FF; - litCSize = (lhc >> 14) & 0x3FF; - break; - case 2: - /* 2 - 2 - 14 - 14 */ - lhSize = 4; - litSize = (lhc >> 4) & 0x3FFF; - litCSize = lhc >> 18; - break; - case 3: - /* 2 - 2 - 18 - 18 */ - lhSize = 5; - litSize = (lhc >> 4) & 0x3FFFF; - litCSize = (lhc >> 22) + (istart[4] << 10); - break; - } - if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) - return ERROR(corruption_detected); - if (litCSize + lhSize > srcSize) - return ERROR(corruption_detected); - - if (HUF_isError( - (litEncType == set_repeat) - ? (singleStream ? HUF_decompress1X_usingDTable(dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr) - : HUF_decompress4X_usingDTable(dctx->litBuffer, litSize, istart + lhSize, litCSize, dctx->HUFptr)) - : (singleStream - ? HUF_decompress1X2_DCtx_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize, litCSize, - dctx->entropy.workspace, sizeof(dctx->entropy.workspace)) - : HUF_decompress4X_hufOnly_wksp(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart + lhSize, litCSize, - dctx->entropy.workspace, sizeof(dctx->entropy.workspace))))) - return ERROR(corruption_detected); - - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - dctx->litEntropy = 1; - if (litEncType == set_compressed) - dctx->HUFptr = dctx->entropy.hufTable; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); - return litCSize + lhSize; - } - - case set_basic: { - size_t litSize, lhSize; - U32 const lhlCode = ((istart[0]) >> 2) & 3; - switch (lhlCode) { - case 0: - case 2: - default: /* note : default is impossible, since lhlCode into [0..3] */ - lhSize = 1; - litSize = istart[0] >> 3; - break; - case 1: - lhSize = 2; - litSize = ZSTD_readLE16(istart) >> 4; - break; - case 3: - lhSize = 3; - litSize = ZSTD_readLE24(istart) >> 4; - break; - } - - if (lhSize + litSize + WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ - if (litSize + lhSize > srcSize) - return ERROR(corruption_detected); - memcpy(dctx->litBuffer, istart + lhSize, litSize); - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); - return lhSize + litSize; - } - /* direct reference into compressed stream */ - dctx->litPtr = istart + lhSize; - dctx->litSize = litSize; - return lhSize + litSize; - } - - case set_rle: { - U32 const lhlCode = ((istart[0]) >> 2) & 3; - size_t litSize, lhSize; - switch (lhlCode) { - case 0: - case 2: - default: /* note : default is impossible, since lhlCode into [0..3] */ - lhSize = 1; - litSize = istart[0] >> 3; - break; - case 1: - lhSize = 2; - litSize = ZSTD_readLE16(istart) >> 4; - break; - case 3: - lhSize = 3; - litSize = ZSTD_readLE24(istart) >> 4; - if (srcSize < 4) - return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4 */ - break; - } - if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) - return ERROR(corruption_detected); - memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); - dctx->litPtr = dctx->litBuffer; - dctx->litSize = litSize; - return lhSize + 1; - } - default: - return ERROR(corruption_detected); /* impossible */ - } - } -} - -typedef union { - FSE_decode_t realData; - U32 alignedBy4; -} FSE_decode_t4; - -static const FSE_decode_t4 LL_defaultDTable[(1 << LL_DEFAULTNORMLOG) + 1] = { - {{LL_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */ - {{0, 0, 4}}, /* 0 : base, symbol, bits */ - {{16, 0, 4}}, - {{32, 1, 5}}, - {{0, 3, 5}}, - {{0, 4, 5}}, - {{0, 6, 5}}, - {{0, 7, 5}}, - {{0, 9, 5}}, - {{0, 10, 5}}, - {{0, 12, 5}}, - {{0, 14, 6}}, - {{0, 16, 5}}, - {{0, 18, 5}}, - {{0, 19, 5}}, - {{0, 21, 5}}, - {{0, 22, 5}}, - {{0, 24, 5}}, - {{32, 25, 5}}, - {{0, 26, 5}}, - {{0, 27, 6}}, - {{0, 29, 6}}, - {{0, 31, 6}}, - {{32, 0, 4}}, - {{0, 1, 4}}, - {{0, 2, 5}}, - {{32, 4, 5}}, - {{0, 5, 5}}, - {{32, 7, 5}}, - {{0, 8, 5}}, - {{32, 10, 5}}, - {{0, 11, 5}}, - {{0, 13, 6}}, - {{32, 16, 5}}, - {{0, 17, 5}}, - {{32, 19, 5}}, - {{0, 20, 5}}, - {{32, 22, 5}}, - {{0, 23, 5}}, - {{0, 25, 4}}, - {{16, 25, 4}}, - {{32, 26, 5}}, - {{0, 28, 6}}, - {{0, 30, 6}}, - {{48, 0, 4}}, - {{16, 1, 4}}, - {{32, 2, 5}}, - {{32, 3, 5}}, - {{32, 5, 5}}, - {{32, 6, 5}}, - {{32, 8, 5}}, - {{32, 9, 5}}, - {{32, 11, 5}}, - {{32, 12, 5}}, - {{0, 15, 6}}, - {{32, 17, 5}}, - {{32, 18, 5}}, - {{32, 20, 5}}, - {{32, 21, 5}}, - {{32, 23, 5}}, - {{32, 24, 5}}, - {{0, 35, 6}}, - {{0, 34, 6}}, - {{0, 33, 6}}, - {{0, 32, 6}}, -}; /* LL_defaultDTable */ - -static const FSE_decode_t4 ML_defaultDTable[(1 << ML_DEFAULTNORMLOG) + 1] = { - {{ML_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */ - {{0, 0, 6}}, /* 0 : base, symbol, bits */ - {{0, 1, 4}}, - {{32, 2, 5}}, - {{0, 3, 5}}, - {{0, 5, 5}}, - {{0, 6, 5}}, - {{0, 8, 5}}, - {{0, 10, 6}}, - {{0, 13, 6}}, - {{0, 16, 6}}, - {{0, 19, 6}}, - {{0, 22, 6}}, - {{0, 25, 6}}, - {{0, 28, 6}}, - {{0, 31, 6}}, - {{0, 33, 6}}, - {{0, 35, 6}}, - {{0, 37, 6}}, - {{0, 39, 6}}, - {{0, 41, 6}}, - {{0, 43, 6}}, - {{0, 45, 6}}, - {{16, 1, 4}}, - {{0, 2, 4}}, - {{32, 3, 5}}, - {{0, 4, 5}}, - {{32, 6, 5}}, - {{0, 7, 5}}, - {{0, 9, 6}}, - {{0, 12, 6}}, - {{0, 15, 6}}, - {{0, 18, 6}}, - {{0, 21, 6}}, - {{0, 24, 6}}, - {{0, 27, 6}}, - {{0, 30, 6}}, - {{0, 32, 6}}, - {{0, 34, 6}}, - {{0, 36, 6}}, - {{0, 38, 6}}, - {{0, 40, 6}}, - {{0, 42, 6}}, - {{0, 44, 6}}, - {{32, 1, 4}}, - {{48, 1, 4}}, - {{16, 2, 4}}, - {{32, 4, 5}}, - {{32, 5, 5}}, - {{32, 7, 5}}, - {{32, 8, 5}}, - {{0, 11, 6}}, - {{0, 14, 6}}, - {{0, 17, 6}}, - {{0, 20, 6}}, - {{0, 23, 6}}, - {{0, 26, 6}}, - {{0, 29, 6}}, - {{0, 52, 6}}, - {{0, 51, 6}}, - {{0, 50, 6}}, - {{0, 49, 6}}, - {{0, 48, 6}}, - {{0, 47, 6}}, - {{0, 46, 6}}, -}; /* ML_defaultDTable */ - -static const FSE_decode_t4 OF_defaultDTable[(1 << OF_DEFAULTNORMLOG) + 1] = { - {{OF_DEFAULTNORMLOG, 1, 1}}, /* header : tableLog, fastMode, fastMode */ - {{0, 0, 5}}, /* 0 : base, symbol, bits */ - {{0, 6, 4}}, - {{0, 9, 5}}, - {{0, 15, 5}}, - {{0, 21, 5}}, - {{0, 3, 5}}, - {{0, 7, 4}}, - {{0, 12, 5}}, - {{0, 18, 5}}, - {{0, 23, 5}}, - {{0, 5, 5}}, - {{0, 8, 4}}, - {{0, 14, 5}}, - {{0, 20, 5}}, - {{0, 2, 5}}, - {{16, 7, 4}}, - {{0, 11, 5}}, - {{0, 17, 5}}, - {{0, 22, 5}}, - {{0, 4, 5}}, - {{16, 8, 4}}, - {{0, 13, 5}}, - {{0, 19, 5}}, - {{0, 1, 5}}, - {{16, 6, 4}}, - {{0, 10, 5}}, - {{0, 16, 5}}, - {{0, 28, 5}}, - {{0, 27, 5}}, - {{0, 26, 5}}, - {{0, 25, 5}}, - {{0, 24, 5}}, -}; /* OF_defaultDTable */ - -/*! ZSTD_buildSeqTable() : - @return : nb bytes read from src, - or an error code if it fails, testable with ZSTD_isError() -*/ -static size_t ZSTD_buildSeqTable(FSE_DTable *DTableSpace, const FSE_DTable **DTablePtr, symbolEncodingType_e type, U32 max, U32 maxLog, const void *src, - size_t srcSize, const FSE_decode_t4 *defaultTable, U32 flagRepeatTable, void *workspace, size_t workspaceSize) -{ - const void *const tmpPtr = defaultTable; /* bypass strict aliasing */ - switch (type) { - case set_rle: - if (!srcSize) - return ERROR(srcSize_wrong); - if ((*(const BYTE *)src) > max) - return ERROR(corruption_detected); - FSE_buildDTable_rle(DTableSpace, *(const BYTE *)src); - *DTablePtr = DTableSpace; - return 1; - case set_basic: *DTablePtr = (const FSE_DTable *)tmpPtr; return 0; - case set_repeat: - if (!flagRepeatTable) - return ERROR(corruption_detected); - return 0; - default: /* impossible */ - case set_compressed: { - U32 tableLog; - S16 *norm = (S16 *)workspace; - size_t const spaceUsed32 = ALIGN(sizeof(S16) * (MaxSeq + 1), sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > workspaceSize) - return ERROR(GENERIC); - workspace = (U32 *)workspace + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - { - size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); - if (FSE_isError(headerSize)) - return ERROR(corruption_detected); - if (tableLog > maxLog) - return ERROR(corruption_detected); - FSE_buildDTable_wksp(DTableSpace, norm, max, tableLog, workspace, workspaceSize); - *DTablePtr = DTableSpace; - return headerSize; - } - } - } -} - -size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx *dctx, int *nbSeqPtr, const void *src, size_t srcSize) -{ - const BYTE *const istart = (const BYTE *const)src; - const BYTE *const iend = istart + srcSize; - const BYTE *ip = istart; - - /* check */ - if (srcSize < MIN_SEQUENCES_SIZE) - return ERROR(srcSize_wrong); - - /* SeqHead */ - { - int nbSeq = *ip++; - if (!nbSeq) { - *nbSeqPtr = 0; - return 1; - } - if (nbSeq > 0x7F) { - if (nbSeq == 0xFF) { - if (ip + 2 > iend) - return ERROR(srcSize_wrong); - nbSeq = ZSTD_readLE16(ip) + LONGNBSEQ, ip += 2; - } else { - if (ip >= iend) - return ERROR(srcSize_wrong); - nbSeq = ((nbSeq - 0x80) << 8) + *ip++; - } - } - *nbSeqPtr = nbSeq; - } - - /* FSE table descriptors */ - if (ip + 4 > iend) - return ERROR(srcSize_wrong); /* minimum possible size */ - { - symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); - symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); - symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); - ip++; - - /* Build DTables */ - { - size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, LLtype, MaxLL, LLFSELog, ip, iend - ip, - LL_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace)); - if (ZSTD_isError(llhSize)) - return ERROR(corruption_detected); - ip += llhSize; - } - { - size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, OFtype, MaxOff, OffFSELog, ip, iend - ip, - OF_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace)); - if (ZSTD_isError(ofhSize)) - return ERROR(corruption_detected); - ip += ofhSize; - } - { - size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, MLtype, MaxML, MLFSELog, ip, iend - ip, - ML_defaultDTable, dctx->fseEntropy, dctx->entropy.workspace, sizeof(dctx->entropy.workspace)); - if (ZSTD_isError(mlhSize)) - return ERROR(corruption_detected); - ip += mlhSize; - } - } - - return ip - istart; -} - -typedef struct { - size_t litLength; - size_t matchLength; - size_t offset; - const BYTE *match; -} seq_t; - -typedef struct { - BIT_DStream_t DStream; - FSE_DState_t stateLL; - FSE_DState_t stateOffb; - FSE_DState_t stateML; - size_t prevOffset[ZSTD_REP_NUM]; - const BYTE *base; - size_t pos; - uPtrDiff gotoDict; -} seqState_t; - -FORCE_NOINLINE -size_t ZSTD_execSequenceLast7(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base, - const BYTE *const vBase, const BYTE *const dictEnd) -{ - BYTE *const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH; - const BYTE *const iLitEnd = *litPtr + sequence.litLength; - const BYTE *match = oLitEnd - sequence.offset; - - /* check */ - if (oMatchEnd > oend) - return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ - if (iLitEnd > litLimit) - return ERROR(corruption_detected); /* over-read beyond lit buffer */ - if (oLitEnd <= oend_w) - return ERROR(GENERIC); /* Precondition */ - - /* copy literals */ - if (op < oend_w) { - ZSTD_wildcopy(op, *litPtr, oend_w - op); - *litPtr += oend_w - op; - op = oend_w; - } - while (op < oLitEnd) - *op++ = *(*litPtr)++; - - /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - base)) { - /* offset beyond prefix */ - if (sequence.offset > (size_t)(oLitEnd - vBase)) - return ERROR(corruption_detected); - match = dictEnd - (base - match); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; - } - /* span extDict & currPrefixSegment */ - { - size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = base; - } - } - while (op < oMatchEnd) - *op++ = *match++; - return sequenceLength; -} - -static seq_t ZSTD_decodeSequence(seqState_t *seqState) -{ - seq_t seq; - - U32 const llCode = FSE_peekSymbol(&seqState->stateLL); - U32 const mlCode = FSE_peekSymbol(&seqState->stateML); - U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ - - U32 const llBits = LL_bits[llCode]; - U32 const mlBits = ML_bits[mlCode]; - U32 const ofBits = ofCode; - U32 const totalBits = llBits + mlBits + ofBits; - - static const U32 LL_base[MaxLL + 1] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, - 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000}; - - static const U32 ML_base[MaxML + 1] = {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 41, - 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003}; - - static const U32 OF_base[MaxOff + 1] = {0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, 0xFD, 0x1FD, - 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, - 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD}; - - /* sequence */ - { - size_t offset; - if (!ofCode) - offset = 0; - else { - offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (ZSTD_32bits()) - BIT_reloadDStream(&seqState->DStream); - } - - if (ofCode <= 1) { - offset += (llCode == 0); - if (offset) { - size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } else { - offset = seqState->prevOffset[0]; - } - } else { - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } - seq.offset = offset; - } - - seq.matchLength = ML_base[mlCode] + ((mlCode > 31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ - if (ZSTD_32bits() && (mlBits + llBits > 24)) - BIT_reloadDStream(&seqState->DStream); - - seq.litLength = LL_base[llCode] + ((llCode > 15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ - if (ZSTD_32bits() || (totalBits > 64 - 7 - (LLFSELog + MLFSELog + OffFSELog))) - BIT_reloadDStream(&seqState->DStream); - - /* ANS state update */ - FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (ZSTD_32bits()) - BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ - - seq.match = NULL; - - return seq; -} - -FORCE_INLINE -size_t ZSTD_execSequence(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base, - const BYTE *const vBase, const BYTE *const dictEnd) -{ - BYTE *const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH; - const BYTE *const iLitEnd = *litPtr + sequence.litLength; - const BYTE *match = oLitEnd - sequence.offset; - - /* check */ - if (oMatchEnd > oend) - return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ - if (iLitEnd > litLimit) - return ERROR(corruption_detected); /* over-read beyond lit buffer */ - if (oLitEnd > oend_w) - return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); - - /* copy Literals */ - ZSTD_copy8(op, *litPtr); - if (sequence.litLength > 8) - ZSTD_wildcopy(op + 8, (*litPtr) + 8, - sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ - op = oLitEnd; - *litPtr = iLitEnd; /* update for next sequence */ - - /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - base)) { - /* offset beyond prefix */ - if (sequence.offset > (size_t)(oLitEnd - vBase)) - return ERROR(corruption_detected); - match = dictEnd + (match - base); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; - } - /* span extDict & currPrefixSegment */ - { - size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = base; - if (op > oend_w || sequence.matchLength < MINMATCH) { - U32 i; - for (i = 0; i < sequence.matchLength; ++i) - op[i] = match[i]; - return sequenceLength; - } - } - } - /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ - - /* match within prefix */ - if (sequence.offset < 8) { - /* close range match, overlap */ - static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ - static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */ - int const sub2 = dec64table[sequence.offset]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[sequence.offset]; - ZSTD_copy4(op + 4, match); - match -= sub2; - } else { - ZSTD_copy8(op, match); - } - op += 8; - match += 8; - - if (oMatchEnd > oend - (16 - MINMATCH)) { - if (op < oend_w) { - ZSTD_wildcopy(op, match, oend_w - op); - match += oend_w - op; - op = oend_w; - } - while (op < oMatchEnd) - *op++ = *match++; - } else { - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8); /* works even if matchLength < 8 */ - } - return sequenceLength; -} - -static size_t ZSTD_decompressSequences(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize) -{ - const BYTE *ip = (const BYTE *)seqStart; - const BYTE *const iend = ip + seqSize; - BYTE *const ostart = (BYTE * const)dst; - BYTE *const oend = ostart + maxDstSize; - BYTE *op = ostart; - const BYTE *litPtr = dctx->litPtr; - const BYTE *const litEnd = litPtr + dctx->litSize; - const BYTE *const base = (const BYTE *)(dctx->base); - const BYTE *const vBase = (const BYTE *)(dctx->vBase); - const BYTE *const dictEnd = (const BYTE *)(dctx->dictEnd); - int nbSeq; - - /* Build Decoding Tables */ - { - size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize); - if (ZSTD_isError(seqHSize)) - return seqHSize; - ip += seqHSize; - } - - /* Regen sequences */ - if (nbSeq) { - seqState_t seqState; - dctx->fseEntropy = 1; - { - U32 i; - for (i = 0; i < ZSTD_REP_NUM; i++) - seqState.prevOffset[i] = dctx->entropy.rep[i]; - } - CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend - ip), corruption_detected); - FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); - FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); - FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - - for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq;) { - nbSeq--; - { - seq_t const sequence = ZSTD_decodeSequence(&seqState); - size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd); - if (ZSTD_isError(oneSeqSize)) - return oneSeqSize; - op += oneSeqSize; - } - } - - /* check if reached exact end */ - if (nbSeq) - return ERROR(corruption_detected); - /* save reps for next block */ - { - U32 i; - for (i = 0; i < ZSTD_REP_NUM; i++) - dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); - } - } - - /* last literal segment */ - { - size_t const lastLLSize = litEnd - litPtr; - if (lastLLSize > (size_t)(oend - op)) - return ERROR(dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; - } - - return op - ostart; -} - -FORCE_INLINE seq_t ZSTD_decodeSequenceLong_generic(seqState_t *seqState, int const longOffsets) -{ - seq_t seq; - - U32 const llCode = FSE_peekSymbol(&seqState->stateLL); - U32 const mlCode = FSE_peekSymbol(&seqState->stateML); - U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ - - U32 const llBits = LL_bits[llCode]; - U32 const mlBits = ML_bits[mlCode]; - U32 const ofBits = ofCode; - U32 const totalBits = llBits + mlBits + ofBits; - - static const U32 LL_base[MaxLL + 1] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, - 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000, 0x10000}; - - static const U32 ML_base[MaxML + 1] = {3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, - 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 37, 39, 41, - 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, 0x1003, 0x2003, 0x4003, 0x8003, 0x10003}; - - static const U32 OF_base[MaxOff + 1] = {0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, 0xFD, 0x1FD, - 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, - 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD}; - - /* sequence */ - { - size_t offset; - if (!ofCode) - offset = 0; - else { - if (longOffsets) { - int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN); - offset = OF_base[ofCode] + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); - if (ZSTD_32bits() || extraBits) - BIT_reloadDStream(&seqState->DStream); - if (extraBits) - offset += BIT_readBitsFast(&seqState->DStream, extraBits); - } else { - offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ - if (ZSTD_32bits()) - BIT_reloadDStream(&seqState->DStream); - } - } - - if (ofCode <= 1) { - offset += (llCode == 0); - if (offset) { - size_t temp = (offset == 3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; - temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ - if (offset != 1) - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset = temp; - } else { - offset = seqState->prevOffset[0]; - } - } else { - seqState->prevOffset[2] = seqState->prevOffset[1]; - seqState->prevOffset[1] = seqState->prevOffset[0]; - seqState->prevOffset[0] = offset; - } - seq.offset = offset; - } - - seq.matchLength = ML_base[mlCode] + ((mlCode > 31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ - if (ZSTD_32bits() && (mlBits + llBits > 24)) - BIT_reloadDStream(&seqState->DStream); - - seq.litLength = LL_base[llCode] + ((llCode > 15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ - if (ZSTD_32bits() || (totalBits > 64 - 7 - (LLFSELog + MLFSELog + OffFSELog))) - BIT_reloadDStream(&seqState->DStream); - - { - size_t const pos = seqState->pos + seq.litLength; - seq.match = seqState->base + pos - seq.offset; /* single memory segment */ - if (seq.offset > pos) - seq.match += seqState->gotoDict; /* separate memory segment */ - seqState->pos = pos + seq.matchLength; - } - - /* ANS state update */ - FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ - FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ - if (ZSTD_32bits()) - BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ - FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ - - return seq; -} - -static seq_t ZSTD_decodeSequenceLong(seqState_t *seqState, unsigned const windowSize) -{ - if (ZSTD_highbit32(windowSize) > STREAM_ACCUMULATOR_MIN) { - return ZSTD_decodeSequenceLong_generic(seqState, 1); - } else { - return ZSTD_decodeSequenceLong_generic(seqState, 0); - } -} - -FORCE_INLINE -size_t ZSTD_execSequenceLong(BYTE *op, BYTE *const oend, seq_t sequence, const BYTE **litPtr, const BYTE *const litLimit, const BYTE *const base, - const BYTE *const vBase, const BYTE *const dictEnd) -{ - BYTE *const oLitEnd = op + sequence.litLength; - size_t const sequenceLength = sequence.litLength + sequence.matchLength; - BYTE *const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ - BYTE *const oend_w = oend - WILDCOPY_OVERLENGTH; - const BYTE *const iLitEnd = *litPtr + sequence.litLength; - const BYTE *match = sequence.match; - - /* check */ - if (oMatchEnd > oend) - return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ - if (iLitEnd > litLimit) - return ERROR(corruption_detected); /* over-read beyond lit buffer */ - if (oLitEnd > oend_w) - return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); - - /* copy Literals */ - ZSTD_copy8(op, *litPtr); - if (sequence.litLength > 8) - ZSTD_wildcopy(op + 8, (*litPtr) + 8, - sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ - op = oLitEnd; - *litPtr = iLitEnd; /* update for next sequence */ - - /* copy Match */ - if (sequence.offset > (size_t)(oLitEnd - base)) { - /* offset beyond prefix */ - if (sequence.offset > (size_t)(oLitEnd - vBase)) - return ERROR(corruption_detected); - if (match + sequence.matchLength <= dictEnd) { - memmove(oLitEnd, match, sequence.matchLength); - return sequenceLength; - } - /* span extDict & currPrefixSegment */ - { - size_t const length1 = dictEnd - match; - memmove(oLitEnd, match, length1); - op = oLitEnd + length1; - sequence.matchLength -= length1; - match = base; - if (op > oend_w || sequence.matchLength < MINMATCH) { - U32 i; - for (i = 0; i < sequence.matchLength; ++i) - op[i] = match[i]; - return sequenceLength; - } - } - } - /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ - - /* match within prefix */ - if (sequence.offset < 8) { - /* close range match, overlap */ - static const U32 dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */ - static const int dec64table[] = {8, 8, 8, 7, 8, 9, 10, 11}; /* subtracted */ - int const sub2 = dec64table[sequence.offset]; - op[0] = match[0]; - op[1] = match[1]; - op[2] = match[2]; - op[3] = match[3]; - match += dec32table[sequence.offset]; - ZSTD_copy4(op + 4, match); - match -= sub2; - } else { - ZSTD_copy8(op, match); - } - op += 8; - match += 8; - - if (oMatchEnd > oend - (16 - MINMATCH)) { - if (op < oend_w) { - ZSTD_wildcopy(op, match, oend_w - op); - match += oend_w - op; - op = oend_w; - } - while (op < oMatchEnd) - *op++ = *match++; - } else { - ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8); /* works even if matchLength < 8 */ - } - return sequenceLength; -} - -static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize) -{ - const BYTE *ip = (const BYTE *)seqStart; - const BYTE *const iend = ip + seqSize; - BYTE *const ostart = (BYTE * const)dst; - BYTE *const oend = ostart + maxDstSize; - BYTE *op = ostart; - const BYTE *litPtr = dctx->litPtr; - const BYTE *const litEnd = litPtr + dctx->litSize; - const BYTE *const base = (const BYTE *)(dctx->base); - const BYTE *const vBase = (const BYTE *)(dctx->vBase); - const BYTE *const dictEnd = (const BYTE *)(dctx->dictEnd); - unsigned const windowSize = dctx->fParams.windowSize; - int nbSeq; - - /* Build Decoding Tables */ - { - size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize); - if (ZSTD_isError(seqHSize)) - return seqHSize; - ip += seqHSize; - } - - /* Regen sequences */ - if (nbSeq) { -#define STORED_SEQS 4 -#define STOSEQ_MASK (STORED_SEQS - 1) -#define ADVANCED_SEQS 4 - seq_t *sequences = (seq_t *)dctx->entropy.workspace; - int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); - seqState_t seqState; - int seqNb; - ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.workspace) >= sizeof(seq_t) * STORED_SEQS); - dctx->fseEntropy = 1; - { - U32 i; - for (i = 0; i < ZSTD_REP_NUM; i++) - seqState.prevOffset[i] = dctx->entropy.rep[i]; - } - seqState.base = base; - seqState.pos = (size_t)(op - base); - seqState.gotoDict = (uPtrDiff)dictEnd - (uPtrDiff)base; /* cast to avoid undefined behaviour */ - CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend - ip), corruption_detected); - FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); - FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); - FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); - - /* prepare in advance */ - for (seqNb = 0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb < seqAdvance; seqNb++) { - sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, windowSize); - } - if (seqNb < seqAdvance) - return ERROR(corruption_detected); - - /* decode and decompress */ - for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb < nbSeq; seqNb++) { - seq_t const sequence = ZSTD_decodeSequenceLong(&seqState, windowSize); - size_t const oneSeqSize = - ZSTD_execSequenceLong(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd); - if (ZSTD_isError(oneSeqSize)) - return oneSeqSize; - ZSTD_PREFETCH(sequence.match); - sequences[seqNb & STOSEQ_MASK] = sequence; - op += oneSeqSize; - } - if (seqNb < nbSeq) - return ERROR(corruption_detected); - - /* finish queue */ - seqNb -= seqAdvance; - for (; seqNb < nbSeq; seqNb++) { - size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd); - if (ZSTD_isError(oneSeqSize)) - return oneSeqSize; - op += oneSeqSize; - } - - /* save reps for next block */ - { - U32 i; - for (i = 0; i < ZSTD_REP_NUM; i++) - dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); - } - } - - /* last literal segment */ - { - size_t const lastLLSize = litEnd - litPtr; - if (lastLLSize > (size_t)(oend - op)) - return ERROR(dstSize_tooSmall); - memcpy(op, litPtr, lastLLSize); - op += lastLLSize; - } - - return op - ostart; -} - -static size_t ZSTD_decompressBlock_internal(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ /* blockType == blockCompressed */ - const BYTE *ip = (const BYTE *)src; - - if (srcSize >= ZSTD_BLOCKSIZE_ABSOLUTEMAX) - return ERROR(srcSize_wrong); - - /* Decode literals section */ - { - size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); - if (ZSTD_isError(litCSize)) - return litCSize; - ip += litCSize; - srcSize -= litCSize; - } - if (sizeof(size_t) > 4) /* do not enable prefetching on 32-bits x86, as it's performance detrimental */ - /* likely because of register pressure */ - /* if that's the correct cause, then 32-bits ARM should be affected differently */ - /* it would be good to test this on ARM real hardware, to see if prefetch version improves speed */ - if (dctx->fParams.windowSize > (1 << 23)) - return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize); - return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); -} - -static void ZSTD_checkContinuity(ZSTD_DCtx *dctx, const void *dst) -{ - if (dst != dctx->previousDstEnd) { /* not contiguous */ - dctx->dictEnd = dctx->previousDstEnd; - dctx->vBase = (const char *)dst - ((const char *)(dctx->previousDstEnd) - (const char *)(dctx->base)); - dctx->base = dst; - dctx->previousDstEnd = dst; - } -} - -size_t ZSTD_decompressBlock(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - size_t dSize; - ZSTD_checkContinuity(dctx, dst); - dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); - dctx->previousDstEnd = (char *)dst + dSize; - return dSize; -} - -/** ZSTD_insertBlock() : - insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ -size_t ZSTD_insertBlock(ZSTD_DCtx *dctx, const void *blockStart, size_t blockSize) -{ - ZSTD_checkContinuity(dctx, blockStart); - dctx->previousDstEnd = (const char *)blockStart + blockSize; - return blockSize; -} - -size_t ZSTD_generateNxBytes(void *dst, size_t dstCapacity, BYTE byte, size_t length) -{ - if (length > dstCapacity) - return ERROR(dstSize_tooSmall); - memset(dst, byte, length); - return length; -} - -/** ZSTD_findFrameCompressedSize() : - * compatible with legacy mode - * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame - * `srcSize` must be at least as large as the frame contained - * @return : the compressed size of the frame starting at `src` */ -size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) -{ - if (srcSize >= ZSTD_skippableHeaderSize && (ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { - return ZSTD_skippableHeaderSize + ZSTD_readLE32((const BYTE *)src + 4); - } else { - const BYTE *ip = (const BYTE *)src; - const BYTE *const ipstart = ip; - size_t remainingSize = srcSize; - ZSTD_frameParams fParams; - - size_t const headerSize = ZSTD_frameHeaderSize(ip, remainingSize); - if (ZSTD_isError(headerSize)) - return headerSize; - - /* Frame Header */ - { - size_t const ret = ZSTD_getFrameParams(&fParams, ip, remainingSize); - if (ZSTD_isError(ret)) - return ret; - if (ret > 0) - return ERROR(srcSize_wrong); - } - - ip += headerSize; - remainingSize -= headerSize; - - /* Loop on each block */ - while (1) { - blockProperties_t blockProperties; - size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); - if (ZSTD_isError(cBlockSize)) - return cBlockSize; - - if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) - return ERROR(srcSize_wrong); - - ip += ZSTD_blockHeaderSize + cBlockSize; - remainingSize -= ZSTD_blockHeaderSize + cBlockSize; - - if (blockProperties.lastBlock) - break; - } - - if (fParams.checksumFlag) { /* Frame content checksum */ - if (remainingSize < 4) - return ERROR(srcSize_wrong); - ip += 4; - remainingSize -= 4; - } - - return ip - ipstart; - } -} - -/*! ZSTD_decompressFrame() : -* @dctx must be properly initialized */ -static size_t ZSTD_decompressFrame(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void **srcPtr, size_t *srcSizePtr) -{ - const BYTE *ip = (const BYTE *)(*srcPtr); - BYTE *const ostart = (BYTE * const)dst; - BYTE *const oend = ostart + dstCapacity; - BYTE *op = ostart; - size_t remainingSize = *srcSizePtr; - - /* check */ - if (remainingSize < ZSTD_frameHeaderSize_min + ZSTD_blockHeaderSize) - return ERROR(srcSize_wrong); - - /* Frame Header */ - { - size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_frameHeaderSize_prefix); - if (ZSTD_isError(frameHeaderSize)) - return frameHeaderSize; - if (remainingSize < frameHeaderSize + ZSTD_blockHeaderSize) - return ERROR(srcSize_wrong); - CHECK_F(ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize)); - ip += frameHeaderSize; - remainingSize -= frameHeaderSize; - } - - /* Loop on each block */ - while (1) { - size_t decodedSize; - blockProperties_t blockProperties; - size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); - if (ZSTD_isError(cBlockSize)) - return cBlockSize; - - ip += ZSTD_blockHeaderSize; - remainingSize -= ZSTD_blockHeaderSize; - if (cBlockSize > remainingSize) - return ERROR(srcSize_wrong); - - switch (blockProperties.blockType) { - case bt_compressed: decodedSize = ZSTD_decompressBlock_internal(dctx, op, oend - op, ip, cBlockSize); break; - case bt_raw: decodedSize = ZSTD_copyRawBlock(op, oend - op, ip, cBlockSize); break; - case bt_rle: decodedSize = ZSTD_generateNxBytes(op, oend - op, *ip, blockProperties.origSize); break; - case bt_reserved: - default: return ERROR(corruption_detected); - } - - if (ZSTD_isError(decodedSize)) - return decodedSize; - if (dctx->fParams.checksumFlag) - xxh64_update(&dctx->xxhState, op, decodedSize); - op += decodedSize; - ip += cBlockSize; - remainingSize -= cBlockSize; - if (blockProperties.lastBlock) - break; - } - - if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ - U32 const checkCalc = (U32)xxh64_digest(&dctx->xxhState); - U32 checkRead; - if (remainingSize < 4) - return ERROR(checksum_wrong); - checkRead = ZSTD_readLE32(ip); - if (checkRead != checkCalc) - return ERROR(checksum_wrong); - ip += 4; - remainingSize -= 4; - } - - /* Allow caller to get size read */ - *srcPtr = ip; - *srcSizePtr = remainingSize; - return op - ostart; -} - -static const void *ZSTD_DDictDictContent(const ZSTD_DDict *ddict); -static size_t ZSTD_DDictDictSize(const ZSTD_DDict *ddict); - -static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize, - const ZSTD_DDict *ddict) -{ - void *const dststart = dst; - - if (ddict) { - if (dict) { - /* programmer error, these two cases should be mutually exclusive */ - return ERROR(GENERIC); - } - - dict = ZSTD_DDictDictContent(ddict); - dictSize = ZSTD_DDictDictSize(ddict); - } - - while (srcSize >= ZSTD_frameHeaderSize_prefix) { - U32 magicNumber; - - magicNumber = ZSTD_readLE32(src); - if (magicNumber != ZSTD_MAGICNUMBER) { - if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { - size_t skippableSize; - if (srcSize < ZSTD_skippableHeaderSize) - return ERROR(srcSize_wrong); - skippableSize = ZSTD_readLE32((const BYTE *)src + 4) + ZSTD_skippableHeaderSize; - if (srcSize < skippableSize) { - return ERROR(srcSize_wrong); - } - - src = (const BYTE *)src + skippableSize; - srcSize -= skippableSize; - continue; - } else { - return ERROR(prefix_unknown); - } - } - - if (ddict) { - /* we were called from ZSTD_decompress_usingDDict */ - ZSTD_refDDict(dctx, ddict); - } else { - /* this will initialize correctly with no dict if dict == NULL, so - * use this in all cases but ddict */ - CHECK_F(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize)); - } - ZSTD_checkContinuity(dctx, dst); - - { - const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, &src, &srcSize); - if (ZSTD_isError(res)) - return res; - /* don't need to bounds check this, ZSTD_decompressFrame will have - * already */ - dst = (BYTE *)dst + res; - dstCapacity -= res; - } - } - - if (srcSize) - return ERROR(srcSize_wrong); /* input not entirely consumed */ - - return (BYTE *)dst - (BYTE *)dststart; -} - -size_t ZSTD_decompress_usingDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const void *dict, size_t dictSize) -{ - return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); -} - -size_t ZSTD_decompressDCtx(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, NULL, 0); -} - -/*-************************************** -* Advanced Streaming Decompression API -* Bufferless and synchronous -****************************************/ -size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx *dctx) { return dctx->expected; } - -ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx *dctx) -{ - switch (dctx->stage) { - default: /* should not happen */ - case ZSTDds_getFrameHeaderSize: - case ZSTDds_decodeFrameHeader: return ZSTDnit_frameHeader; - case ZSTDds_decodeBlockHeader: return ZSTDnit_blockHeader; - case ZSTDds_decompressBlock: return ZSTDnit_block; - case ZSTDds_decompressLastBlock: return ZSTDnit_lastBlock; - case ZSTDds_checkChecksum: return ZSTDnit_checksum; - case ZSTDds_decodeSkippableHeader: - case ZSTDds_skipFrame: return ZSTDnit_skippableFrame; - } -} - -int ZSTD_isSkipFrame(ZSTD_DCtx *dctx) { return dctx->stage == ZSTDds_skipFrame; } /* for zbuff */ - -/** ZSTD_decompressContinue() : -* @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) -* or an error code, which can be tested using ZSTD_isError() */ -size_t ZSTD_decompressContinue(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - /* Sanity check */ - if (srcSize != dctx->expected) - return ERROR(srcSize_wrong); - if (dstCapacity) - ZSTD_checkContinuity(dctx, dst); - - switch (dctx->stage) { - case ZSTDds_getFrameHeaderSize: - if (srcSize != ZSTD_frameHeaderSize_prefix) - return ERROR(srcSize_wrong); /* impossible */ - if ((ZSTD_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ - memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix); - dctx->expected = ZSTD_skippableHeaderSize - ZSTD_frameHeaderSize_prefix; /* magic number + skippable frame length */ - dctx->stage = ZSTDds_decodeSkippableHeader; - return 0; - } - dctx->headerSize = ZSTD_frameHeaderSize(src, ZSTD_frameHeaderSize_prefix); - if (ZSTD_isError(dctx->headerSize)) - return dctx->headerSize; - memcpy(dctx->headerBuffer, src, ZSTD_frameHeaderSize_prefix); - if (dctx->headerSize > ZSTD_frameHeaderSize_prefix) { - dctx->expected = dctx->headerSize - ZSTD_frameHeaderSize_prefix; - dctx->stage = ZSTDds_decodeFrameHeader; - return 0; - } - dctx->expected = 0; /* not necessary to copy more */ - fallthrough; - - case ZSTDds_decodeFrameHeader: - memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected); - CHECK_F(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); - dctx->expected = ZSTD_blockHeaderSize; - dctx->stage = ZSTDds_decodeBlockHeader; - return 0; - - case ZSTDds_decodeBlockHeader: { - blockProperties_t bp; - size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); - if (ZSTD_isError(cBlockSize)) - return cBlockSize; - dctx->expected = cBlockSize; - dctx->bType = bp.blockType; - dctx->rleSize = bp.origSize; - if (cBlockSize) { - dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; - return 0; - } - /* empty block */ - if (bp.lastBlock) { - if (dctx->fParams.checksumFlag) { - dctx->expected = 4; - dctx->stage = ZSTDds_checkChecksum; - } else { - dctx->expected = 0; /* end of frame */ - dctx->stage = ZSTDds_getFrameHeaderSize; - } - } else { - dctx->expected = 3; /* go directly to next header */ - dctx->stage = ZSTDds_decodeBlockHeader; - } - return 0; - } - case ZSTDds_decompressLastBlock: - case ZSTDds_decompressBlock: { - size_t rSize; - switch (dctx->bType) { - case bt_compressed: rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize); break; - case bt_raw: rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); break; - case bt_rle: rSize = ZSTD_setRleBlock(dst, dstCapacity, src, srcSize, dctx->rleSize); break; - case bt_reserved: /* should never happen */ - default: return ERROR(corruption_detected); - } - if (ZSTD_isError(rSize)) - return rSize; - if (dctx->fParams.checksumFlag) - xxh64_update(&dctx->xxhState, dst, rSize); - - if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ - if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ - dctx->expected = 4; - dctx->stage = ZSTDds_checkChecksum; - } else { - dctx->expected = 0; /* ends here */ - dctx->stage = ZSTDds_getFrameHeaderSize; - } - } else { - dctx->stage = ZSTDds_decodeBlockHeader; - dctx->expected = ZSTD_blockHeaderSize; - dctx->previousDstEnd = (char *)dst + rSize; - } - return rSize; - } - case ZSTDds_checkChecksum: { - U32 const h32 = (U32)xxh64_digest(&dctx->xxhState); - U32 const check32 = ZSTD_readLE32(src); /* srcSize == 4, guaranteed by dctx->expected */ - if (check32 != h32) - return ERROR(checksum_wrong); - dctx->expected = 0; - dctx->stage = ZSTDds_getFrameHeaderSize; - return 0; - } - case ZSTDds_decodeSkippableHeader: { - memcpy(dctx->headerBuffer + ZSTD_frameHeaderSize_prefix, src, dctx->expected); - dctx->expected = ZSTD_readLE32(dctx->headerBuffer + 4); - dctx->stage = ZSTDds_skipFrame; - return 0; - } - case ZSTDds_skipFrame: { - dctx->expected = 0; - dctx->stage = ZSTDds_getFrameHeaderSize; - return 0; - } - default: - return ERROR(GENERIC); /* impossible */ - } -} - -static size_t ZSTD_refDictContent(ZSTD_DCtx *dctx, const void *dict, size_t dictSize) -{ - dctx->dictEnd = dctx->previousDstEnd; - dctx->vBase = (const char *)dict - ((const char *)(dctx->previousDstEnd) - (const char *)(dctx->base)); - dctx->base = dict; - dctx->previousDstEnd = (const char *)dict + dictSize; - return 0; -} - -/* ZSTD_loadEntropy() : - * dict : must point at beginning of a valid zstd dictionary - * @return : size of entropy tables read */ -static size_t ZSTD_loadEntropy(ZSTD_entropyTables_t *entropy, const void *const dict, size_t const dictSize) -{ - const BYTE *dictPtr = (const BYTE *)dict; - const BYTE *const dictEnd = dictPtr + dictSize; - - if (dictSize <= 8) - return ERROR(dictionary_corrupted); - dictPtr += 8; /* skip header = magic + dictID */ - - { - size_t const hSize = HUF_readDTableX4_wksp(entropy->hufTable, dictPtr, dictEnd - dictPtr, entropy->workspace, sizeof(entropy->workspace)); - if (HUF_isError(hSize)) - return ERROR(dictionary_corrupted); - dictPtr += hSize; - } - - { - short offcodeNCount[MaxOff + 1]; - U32 offcodeMaxValue = MaxOff, offcodeLog; - size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd - dictPtr); - if (FSE_isError(offcodeHeaderSize)) - return ERROR(dictionary_corrupted); - if (offcodeLog > OffFSELog) - return ERROR(dictionary_corrupted); - CHECK_E(FSE_buildDTable_wksp(entropy->OFTable, offcodeNCount, offcodeMaxValue, offcodeLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted); - dictPtr += offcodeHeaderSize; - } - - { - short matchlengthNCount[MaxML + 1]; - unsigned matchlengthMaxValue = MaxML, matchlengthLog; - size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd - dictPtr); - if (FSE_isError(matchlengthHeaderSize)) - return ERROR(dictionary_corrupted); - if (matchlengthLog > MLFSELog) - return ERROR(dictionary_corrupted); - CHECK_E(FSE_buildDTable_wksp(entropy->MLTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted); - dictPtr += matchlengthHeaderSize; - } - - { - short litlengthNCount[MaxLL + 1]; - unsigned litlengthMaxValue = MaxLL, litlengthLog; - size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd - dictPtr); - if (FSE_isError(litlengthHeaderSize)) - return ERROR(dictionary_corrupted); - if (litlengthLog > LLFSELog) - return ERROR(dictionary_corrupted); - CHECK_E(FSE_buildDTable_wksp(entropy->LLTable, litlengthNCount, litlengthMaxValue, litlengthLog, entropy->workspace, sizeof(entropy->workspace)), dictionary_corrupted); - dictPtr += litlengthHeaderSize; - } - - if (dictPtr + 12 > dictEnd) - return ERROR(dictionary_corrupted); - { - int i; - size_t const dictContentSize = (size_t)(dictEnd - (dictPtr + 12)); - for (i = 0; i < 3; i++) { - U32 const rep = ZSTD_readLE32(dictPtr); - dictPtr += 4; - if (rep == 0 || rep >= dictContentSize) - return ERROR(dictionary_corrupted); - entropy->rep[i] = rep; - } - } - - return dictPtr - (const BYTE *)dict; -} - -static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx *dctx, const void *dict, size_t dictSize) -{ - if (dictSize < 8) - return ZSTD_refDictContent(dctx, dict, dictSize); - { - U32 const magic = ZSTD_readLE32(dict); - if (magic != ZSTD_DICT_MAGIC) { - return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ - } - } - dctx->dictID = ZSTD_readLE32((const char *)dict + 4); - - /* load entropy tables */ - { - size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize); - if (ZSTD_isError(eSize)) - return ERROR(dictionary_corrupted); - dict = (const char *)dict + eSize; - dictSize -= eSize; - } - dctx->litEntropy = dctx->fseEntropy = 1; - - /* reference dictionary content */ - return ZSTD_refDictContent(dctx, dict, dictSize); -} - -size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx *dctx, const void *dict, size_t dictSize) -{ - CHECK_F(ZSTD_decompressBegin(dctx)); - if (dict && dictSize) - CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted); - return 0; -} - -/* ====== ZSTD_DDict ====== */ - -struct ZSTD_DDict_s { - void *dictBuffer; - const void *dictContent; - size_t dictSize; - ZSTD_entropyTables_t entropy; - U32 dictID; - U32 entropyPresent; - ZSTD_customMem cMem; -}; /* typedef'd to ZSTD_DDict within "zstd.h" */ - -size_t ZSTD_DDictWorkspaceBound(void) { return ZSTD_ALIGN(sizeof(ZSTD_stack)) + ZSTD_ALIGN(sizeof(ZSTD_DDict)); } - -static const void *ZSTD_DDictDictContent(const ZSTD_DDict *ddict) { return ddict->dictContent; } - -static size_t ZSTD_DDictDictSize(const ZSTD_DDict *ddict) { return ddict->dictSize; } - -static void ZSTD_refDDict(ZSTD_DCtx *dstDCtx, const ZSTD_DDict *ddict) -{ - ZSTD_decompressBegin(dstDCtx); /* init */ - if (ddict) { /* support refDDict on NULL */ - dstDCtx->dictID = ddict->dictID; - dstDCtx->base = ddict->dictContent; - dstDCtx->vBase = ddict->dictContent; - dstDCtx->dictEnd = (const BYTE *)ddict->dictContent + ddict->dictSize; - dstDCtx->previousDstEnd = dstDCtx->dictEnd; - if (ddict->entropyPresent) { - dstDCtx->litEntropy = 1; - dstDCtx->fseEntropy = 1; - dstDCtx->LLTptr = ddict->entropy.LLTable; - dstDCtx->MLTptr = ddict->entropy.MLTable; - dstDCtx->OFTptr = ddict->entropy.OFTable; - dstDCtx->HUFptr = ddict->entropy.hufTable; - dstDCtx->entropy.rep[0] = ddict->entropy.rep[0]; - dstDCtx->entropy.rep[1] = ddict->entropy.rep[1]; - dstDCtx->entropy.rep[2] = ddict->entropy.rep[2]; - } else { - dstDCtx->litEntropy = 0; - dstDCtx->fseEntropy = 0; - } - } -} - -static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict *ddict) -{ - ddict->dictID = 0; - ddict->entropyPresent = 0; - if (ddict->dictSize < 8) - return 0; - { - U32 const magic = ZSTD_readLE32(ddict->dictContent); - if (magic != ZSTD_DICT_MAGIC) - return 0; /* pure content mode */ - } - ddict->dictID = ZSTD_readLE32((const char *)ddict->dictContent + 4); - - /* load entropy tables */ - CHECK_E(ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted); - ddict->entropyPresent = 1; - return 0; -} - -static ZSTD_DDict *ZSTD_createDDict_advanced(const void *dict, size_t dictSize, unsigned byReference, ZSTD_customMem customMem) -{ - if (!customMem.customAlloc || !customMem.customFree) - return NULL; - - { - ZSTD_DDict *const ddict = (ZSTD_DDict *)ZSTD_malloc(sizeof(ZSTD_DDict), customMem); - if (!ddict) - return NULL; - ddict->cMem = customMem; - - if ((byReference) || (!dict) || (!dictSize)) { - ddict->dictBuffer = NULL; - ddict->dictContent = dict; - } else { - void *const internalBuffer = ZSTD_malloc(dictSize, customMem); - if (!internalBuffer) { - ZSTD_freeDDict(ddict); - return NULL; - } - memcpy(internalBuffer, dict, dictSize); - ddict->dictBuffer = internalBuffer; - ddict->dictContent = internalBuffer; - } - ddict->dictSize = dictSize; - ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ - /* parse dictionary content */ - { - size_t const errorCode = ZSTD_loadEntropy_inDDict(ddict); - if (ZSTD_isError(errorCode)) { - ZSTD_freeDDict(ddict); - return NULL; - } - } - - return ddict; - } -} - -/*! ZSTD_initDDict() : -* Create a digested dictionary, to start decompression without startup delay. -* `dict` content is copied inside DDict. -* Consequently, `dict` can be released after `ZSTD_DDict` creation */ -ZSTD_DDict *ZSTD_initDDict(const void *dict, size_t dictSize, void *workspace, size_t workspaceSize) -{ - ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); - return ZSTD_createDDict_advanced(dict, dictSize, 1, stackMem); -} - -size_t ZSTD_freeDDict(ZSTD_DDict *ddict) -{ - if (ddict == NULL) - return 0; /* support free on NULL */ - { - ZSTD_customMem const cMem = ddict->cMem; - ZSTD_free(ddict->dictBuffer, cMem); - ZSTD_free(ddict, cMem); - return 0; - } -} - -/*! ZSTD_getDictID_fromDict() : - * Provides the dictID stored within dictionary. - * if @return == 0, the dictionary is not conformant with Zstandard specification. - * It can still be loaded, but as a content-only dictionary. */ -unsigned ZSTD_getDictID_fromDict(const void *dict, size_t dictSize) -{ - if (dictSize < 8) - return 0; - if (ZSTD_readLE32(dict) != ZSTD_DICT_MAGIC) - return 0; - return ZSTD_readLE32((const char *)dict + 4); -} - -/*! ZSTD_getDictID_fromDDict() : - * Provides the dictID of the dictionary loaded into `ddict`. - * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. - * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ -unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict *ddict) -{ - if (ddict == NULL) - return 0; - return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); -} - -/*! ZSTD_getDictID_fromFrame() : - * Provides the dictID required to decompressed the frame stored within `src`. - * If @return == 0, the dictID could not be decoded. - * This could for one of the following reasons : - * - The frame does not require a dictionary to be decoded (most common case). - * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. - * Note : this use case also happens when using a non-conformant dictionary. - * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). - * - This is not a Zstandard frame. - * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */ -unsigned ZSTD_getDictID_fromFrame(const void *src, size_t srcSize) -{ - ZSTD_frameParams zfp = {0, 0, 0, 0}; - size_t const hError = ZSTD_getFrameParams(&zfp, src, srcSize); - if (ZSTD_isError(hError)) - return 0; - return zfp.dictID; -} - -/*! ZSTD_decompress_usingDDict() : -* Decompression using a pre-digested Dictionary -* Use dictionary without significant overhead. */ -size_t ZSTD_decompress_usingDDict(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const ZSTD_DDict *ddict) -{ - /* pass content and size in case legacy frames are encountered */ - return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, NULL, 0, ddict); -} - -/*===================================== -* Streaming decompression -*====================================*/ - -typedef enum { zdss_init, zdss_loadHeader, zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; - -/* *** Resource management *** */ -struct ZSTD_DStream_s { - ZSTD_DCtx *dctx; - ZSTD_DDict *ddictLocal; - const ZSTD_DDict *ddict; - ZSTD_frameParams fParams; - ZSTD_dStreamStage stage; - char *inBuff; - size_t inBuffSize; - size_t inPos; - size_t maxWindowSize; - char *outBuff; - size_t outBuffSize; - size_t outStart; - size_t outEnd; - size_t blockSize; - BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; /* tmp buffer to store frame header */ - size_t lhSize; - ZSTD_customMem customMem; - void *legacyContext; - U32 previousLegacyVersion; - U32 legacyVersion; - U32 hostageByte; -}; /* typedef'd to ZSTD_DStream within "zstd.h" */ - -size_t ZSTD_DStreamWorkspaceBound(size_t maxWindowSize) -{ - size_t const blockSize = MIN(maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); - size_t const inBuffSize = blockSize; - size_t const outBuffSize = maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2; - return ZSTD_DCtxWorkspaceBound() + ZSTD_ALIGN(sizeof(ZSTD_DStream)) + ZSTD_ALIGN(inBuffSize) + ZSTD_ALIGN(outBuffSize); -} - -static ZSTD_DStream *ZSTD_createDStream_advanced(ZSTD_customMem customMem) -{ - ZSTD_DStream *zds; - - if (!customMem.customAlloc || !customMem.customFree) - return NULL; - - zds = (ZSTD_DStream *)ZSTD_malloc(sizeof(ZSTD_DStream), customMem); - if (zds == NULL) - return NULL; - memset(zds, 0, sizeof(ZSTD_DStream)); - memcpy(&zds->customMem, &customMem, sizeof(ZSTD_customMem)); - zds->dctx = ZSTD_createDCtx_advanced(customMem); - if (zds->dctx == NULL) { - ZSTD_freeDStream(zds); - return NULL; - } - zds->stage = zdss_init; - zds->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; - return zds; -} - -ZSTD_DStream *ZSTD_initDStream(size_t maxWindowSize, void *workspace, size_t workspaceSize) -{ - ZSTD_customMem const stackMem = ZSTD_initStack(workspace, workspaceSize); - ZSTD_DStream *zds = ZSTD_createDStream_advanced(stackMem); - if (!zds) { - return NULL; - } - - zds->maxWindowSize = maxWindowSize; - zds->stage = zdss_loadHeader; - zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; - ZSTD_freeDDict(zds->ddictLocal); - zds->ddictLocal = NULL; - zds->ddict = zds->ddictLocal; - zds->legacyVersion = 0; - zds->hostageByte = 0; - - { - size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); - size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2; - - zds->inBuff = (char *)ZSTD_malloc(blockSize, zds->customMem); - zds->inBuffSize = blockSize; - zds->outBuff = (char *)ZSTD_malloc(neededOutSize, zds->customMem); - zds->outBuffSize = neededOutSize; - if (zds->inBuff == NULL || zds->outBuff == NULL) { - ZSTD_freeDStream(zds); - return NULL; - } - } - return zds; -} - -ZSTD_DStream *ZSTD_initDStream_usingDDict(size_t maxWindowSize, const ZSTD_DDict *ddict, void *workspace, size_t workspaceSize) -{ - ZSTD_DStream *zds = ZSTD_initDStream(maxWindowSize, workspace, workspaceSize); - if (zds) { - zds->ddict = ddict; - } - return zds; -} - -size_t ZSTD_freeDStream(ZSTD_DStream *zds) -{ - if (zds == NULL) - return 0; /* support free on null */ - { - ZSTD_customMem const cMem = zds->customMem; - ZSTD_freeDCtx(zds->dctx); - zds->dctx = NULL; - ZSTD_freeDDict(zds->ddictLocal); - zds->ddictLocal = NULL; - ZSTD_free(zds->inBuff, cMem); - zds->inBuff = NULL; - ZSTD_free(zds->outBuff, cMem); - zds->outBuff = NULL; - ZSTD_free(zds, cMem); - return 0; - } -} - -/* *** Initialization *** */ - -size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize; } -size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; } - -size_t ZSTD_resetDStream(ZSTD_DStream *zds) -{ - zds->stage = zdss_loadHeader; - zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; - zds->legacyVersion = 0; - zds->hostageByte = 0; - return ZSTD_frameHeaderSize_prefix; -} - -/* ***** Decompression ***** */ - -ZSTD_STATIC size_t ZSTD_limitCopy(void *dst, size_t dstCapacity, const void *src, size_t srcSize) -{ - size_t const length = MIN(dstCapacity, srcSize); - memcpy(dst, src, length); - return length; -} - -size_t ZSTD_decompressStream(ZSTD_DStream *zds, ZSTD_outBuffer *output, ZSTD_inBuffer *input) -{ - const char *const istart = (const char *)(input->src) + input->pos; - const char *const iend = (const char *)(input->src) + input->size; - const char *ip = istart; - char *const ostart = (char *)(output->dst) + output->pos; - char *const oend = (char *)(output->dst) + output->size; - char *op = ostart; - U32 someMoreWork = 1; - - while (someMoreWork) { - switch (zds->stage) { - case zdss_init: - ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */ - fallthrough; - - case zdss_loadHeader: { - size_t const hSize = ZSTD_getFrameParams(&zds->fParams, zds->headerBuffer, zds->lhSize); - if (ZSTD_isError(hSize)) - return hSize; - if (hSize != 0) { /* need more input */ - size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ - if (toLoad > (size_t)(iend - ip)) { /* not enough input to load full header */ - memcpy(zds->headerBuffer + zds->lhSize, ip, iend - ip); - zds->lhSize += iend - ip; - input->pos = input->size; - return (MAX(ZSTD_frameHeaderSize_min, hSize) - zds->lhSize) + - ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ - } - memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); - zds->lhSize = hSize; - ip += toLoad; - break; - } - - /* check for single-pass mode opportunity */ - if (zds->fParams.frameContentSize && zds->fParams.windowSize /* skippable frame if == 0 */ - && (U64)(size_t)(oend - op) >= zds->fParams.frameContentSize) { - size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend - istart); - if (cSize <= (size_t)(iend - istart)) { - size_t const decompressedSize = ZSTD_decompress_usingDDict(zds->dctx, op, oend - op, istart, cSize, zds->ddict); - if (ZSTD_isError(decompressedSize)) - return decompressedSize; - ip = istart + cSize; - op += decompressedSize; - zds->dctx->expected = 0; - zds->stage = zdss_init; - someMoreWork = 0; - break; - } - } - - /* Consume header */ - ZSTD_refDDict(zds->dctx, zds->ddict); - { - size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); /* == ZSTD_frameHeaderSize_prefix */ - CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer, h1Size)); - { - size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zds->dctx); - CHECK_F(ZSTD_decompressContinue(zds->dctx, NULL, 0, zds->headerBuffer + h1Size, h2Size)); - } - } - - zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); - if (zds->fParams.windowSize > zds->maxWindowSize) - return ERROR(frameParameter_windowTooLarge); - - /* Buffers are preallocated, but double check */ - { - size_t const blockSize = MIN(zds->maxWindowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX); - size_t const neededOutSize = zds->maxWindowSize + blockSize + WILDCOPY_OVERLENGTH * 2; - if (zds->inBuffSize < blockSize) { - return ERROR(GENERIC); - } - if (zds->outBuffSize < neededOutSize) { - return ERROR(GENERIC); - } - zds->blockSize = blockSize; - } - zds->stage = zdss_read; - } - fallthrough; - - case zdss_read: { - size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx); - if (neededInSize == 0) { /* end of frame */ - zds->stage = zdss_init; - someMoreWork = 0; - break; - } - if ((size_t)(iend - ip) >= neededInSize) { /* decode directly from src */ - const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx); - size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, zds->outBuff + zds->outStart, - (isSkipFrame ? 0 : zds->outBuffSize - zds->outStart), ip, neededInSize); - if (ZSTD_isError(decodedSize)) - return decodedSize; - ip += neededInSize; - if (!decodedSize && !isSkipFrame) - break; /* this was just a header */ - zds->outEnd = zds->outStart + decodedSize; - zds->stage = zdss_flush; - break; - } - if (ip == iend) { - someMoreWork = 0; - break; - } /* no more input */ - zds->stage = zdss_load; - /* pass-through */ - } - fallthrough; - - case zdss_load: { - size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds->dctx); - size_t const toLoad = neededInSize - zds->inPos; /* should always be <= remaining space within inBuff */ - size_t loadedSize; - if (toLoad > zds->inBuffSize - zds->inPos) - return ERROR(corruption_detected); /* should never happen */ - loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend - ip); - ip += loadedSize; - zds->inPos += loadedSize; - if (loadedSize < toLoad) { - someMoreWork = 0; - break; - } /* not enough input, wait for more */ - - /* decode loaded input */ - { - const int isSkipFrame = ZSTD_isSkipFrame(zds->dctx); - size_t const decodedSize = ZSTD_decompressContinue(zds->dctx, zds->outBuff + zds->outStart, zds->outBuffSize - zds->outStart, - zds->inBuff, neededInSize); - if (ZSTD_isError(decodedSize)) - return decodedSize; - zds->inPos = 0; /* input is consumed */ - if (!decodedSize && !isSkipFrame) { - zds->stage = zdss_read; - break; - } /* this was just a header */ - zds->outEnd = zds->outStart + decodedSize; - zds->stage = zdss_flush; - /* pass-through */ - } - } - fallthrough; - - case zdss_flush: { - size_t const toFlushSize = zds->outEnd - zds->outStart; - size_t const flushedSize = ZSTD_limitCopy(op, oend - op, zds->outBuff + zds->outStart, toFlushSize); - op += flushedSize; - zds->outStart += flushedSize; - if (flushedSize == toFlushSize) { /* flush completed */ - zds->stage = zdss_read; - if (zds->outStart + zds->blockSize > zds->outBuffSize) - zds->outStart = zds->outEnd = 0; - break; - } - /* cannot complete flush */ - someMoreWork = 0; - break; - } - default: - return ERROR(GENERIC); /* impossible */ - } - } - - /* result */ - input->pos += (size_t)(ip - istart); - output->pos += (size_t)(op - ostart); - { - size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds->dctx); - if (!nextSrcSizeHint) { /* frame fully decoded */ - if (zds->outEnd == zds->outStart) { /* output fully flushed */ - if (zds->hostageByte) { - if (input->pos >= input->size) { - zds->stage = zdss_read; - return 1; - } /* can't release hostage (not present) */ - input->pos++; /* release hostage */ - } - return 0; - } - if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ - input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ - zds->hostageByte = 1; - } - return 1; - } - nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds->dctx) == ZSTDnit_block); /* preload header of next block */ - if (zds->inPos > nextSrcSizeHint) - return ERROR(GENERIC); /* should never happen */ - nextSrcSizeHint -= zds->inPos; /* already loaded*/ - return nextSrcSizeHint; - } -} - -EXPORT_SYMBOL(ZSTD_DCtxWorkspaceBound); -EXPORT_SYMBOL(ZSTD_initDCtx); -EXPORT_SYMBOL(ZSTD_decompressDCtx); -EXPORT_SYMBOL(ZSTD_decompress_usingDict); - -EXPORT_SYMBOL(ZSTD_DDictWorkspaceBound); -EXPORT_SYMBOL(ZSTD_initDDict); -EXPORT_SYMBOL(ZSTD_decompress_usingDDict); - -EXPORT_SYMBOL(ZSTD_DStreamWorkspaceBound); -EXPORT_SYMBOL(ZSTD_initDStream); -EXPORT_SYMBOL(ZSTD_initDStream_usingDDict); -EXPORT_SYMBOL(ZSTD_resetDStream); -EXPORT_SYMBOL(ZSTD_decompressStream); -EXPORT_SYMBOL(ZSTD_DStreamInSize); -EXPORT_SYMBOL(ZSTD_DStreamOutSize); - -EXPORT_SYMBOL(ZSTD_findFrameCompressedSize); -EXPORT_SYMBOL(ZSTD_getFrameContentSize); -EXPORT_SYMBOL(ZSTD_findDecompressedSize); - -EXPORT_SYMBOL(ZSTD_isFrame); -EXPORT_SYMBOL(ZSTD_getDictID_fromDict); -EXPORT_SYMBOL(ZSTD_getDictID_fromDDict); -EXPORT_SYMBOL(ZSTD_getDictID_fromFrame); - -EXPORT_SYMBOL(ZSTD_getFrameParams); -EXPORT_SYMBOL(ZSTD_decompressBegin); -EXPORT_SYMBOL(ZSTD_decompressBegin_usingDict); -EXPORT_SYMBOL(ZSTD_copyDCtx); -EXPORT_SYMBOL(ZSTD_nextSrcSizeToDecompress); -EXPORT_SYMBOL(ZSTD_decompressContinue); -EXPORT_SYMBOL(ZSTD_nextInputType); - -EXPORT_SYMBOL(ZSTD_decompressBlock); -EXPORT_SYMBOL(ZSTD_insertBlock); - -MODULE_LICENSE("Dual BSD/GPL"); -MODULE_DESCRIPTION("Zstd Decompressor"); diff --git a/lib/zstd/decompress/huf_decompress.c b/lib/zstd/decompress/huf_decompress.c new file mode 100644 index 000000000000..c48d57e3fffc --- /dev/null +++ b/lib/zstd/decompress/huf_decompress.c @@ -0,0 +1,1206 @@ +/* ****************************************************************** + * huff0 huffman decoder, + * part of Finite State Entropy library + * Copyright (c) Yann Collet, Facebook, Inc. + * + * You can contact the author at : + * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. +****************************************************************** */ + +/* ************************************************************** +* Dependencies +****************************************************************/ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */ +#include "../common/compiler.h" +#include "../common/bitstream.h" /* BIT_* */ +#include "../common/fse.h" /* to compress headers */ +#define HUF_STATIC_LINKING_ONLY +#include "../common/huf.h" +#include "../common/error_private.h" + +/* ************************************************************** +* Macros +****************************************************************/ + +/* These two optional macros force the use one way or another of the two + * Huffman decompression implementations. You can't force in both directions + * at the same time. + */ +#if defined(HUF_FORCE_DECOMPRESS_X1) && \ + defined(HUF_FORCE_DECOMPRESS_X2) +#error "Cannot force the use of the X1 and X2 decoders at the same time!" +#endif + + +/* ************************************************************** +* Error Management +****************************************************************/ +#define HUF_isError ERR_isError + + +/* ************************************************************** +* Byte alignment for workSpace management +****************************************************************/ +#define HUF_ALIGN(x, a) HUF_ALIGN_MASK((x), (a) - 1) +#define HUF_ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) + + +/* ************************************************************** +* BMI2 Variant Wrappers +****************************************************************/ +#if DYNAMIC_BMI2 + +#define HUF_DGEN(fn) \ + \ + static size_t fn##_default( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2( \ + void* dst, size_t dstSize, \ + const void* cSrc, size_t cSrcSize, \ + const HUF_DTable* DTable) \ + { \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + if (bmi2) { \ + return fn##_bmi2(dst, dstSize, cSrc, cSrcSize, DTable); \ + } \ + return fn##_default(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#else + +#define HUF_DGEN(fn) \ + static size_t fn(void* dst, size_t dstSize, void const* cSrc, \ + size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \ + { \ + (void)bmi2; \ + return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable); \ + } + +#endif + + +/*-***************************/ +/* generic DTableDesc */ +/*-***************************/ +typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc; + +static DTableDesc HUF_getDTableDesc(const HUF_DTable* table) +{ + DTableDesc dtd; + ZSTD_memcpy(&dtd, table, sizeof(dtd)); + return dtd; +} + + +#ifndef HUF_FORCE_DECOMPRESS_X2 + +/*-***************************/ +/* single-symbol decoding */ +/*-***************************/ +typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */ + +/** + * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at + * a time. + */ +static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) { + U64 D4; + if (MEM_isLittleEndian()) { + D4 = symbol + (nbBits << 8); + } else { + D4 = (symbol << 8) + nbBits; + } + D4 *= 0x0001000100010001ULL; + return D4; +} + +typedef struct { + U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1]; + U32 statsWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; + BYTE symbols[HUF_SYMBOLVALUE_MAX + 1]; + BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; +} HUF_ReadDTableX1_Workspace; + + +size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize) +{ + return HUF_readDTableX1_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + +size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + U32 tableLog = 0; + U32 nbSymbols = 0; + size_t iSize; + void* const dtPtr = DTable + 1; + HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr; + HUF_ReadDTableX1_Workspace* wksp = (HUF_ReadDTableX1_Workspace*)workSpace; + + DEBUG_STATIC_ASSERT(HUF_DECOMPRESS_WORKSPACE_SIZE >= sizeof(*wksp)); + if (sizeof(*wksp) > wkspSize) return ERROR(tableLog_tooLarge); + + DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); + /* ZSTD_memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2); + if (HUF_isError(iSize)) return iSize; + + /* Table header */ + { DTableDesc dtd = HUF_getDTableDesc(DTable); + if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ + dtd.tableType = 0; + dtd.tableLog = (BYTE)tableLog; + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); + } + + /* Compute symbols and rankStart given rankVal: + * + * rankVal already contains the number of values of each weight. + * + * symbols contains the symbols ordered by weight. First are the rankVal[0] + * weight 0 symbols, followed by the rankVal[1] weight 1 symbols, and so on. + * symbols[0] is filled (but unused) to avoid a branch. + * + * rankStart contains the offset where each rank belongs in the DTable. + * rankStart[0] is not filled because there are no entries in the table for + * weight 0. + */ + { + int n; + int nextRankStart = 0; + int const unroll = 4; + int const nLimit = (int)nbSymbols - unroll + 1; + for (n=0; n<(int)tableLog+1; n++) { + U32 const curr = nextRankStart; + nextRankStart += wksp->rankVal[n]; + wksp->rankStart[n] = curr; + } + for (n=0; n < nLimit; n += unroll) { + int u; + for (u=0; u < unroll; ++u) { + size_t const w = wksp->huffWeight[n+u]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)(n+u); + } + } + for (; n < (int)nbSymbols; ++n) { + size_t const w = wksp->huffWeight[n]; + wksp->symbols[wksp->rankStart[w]++] = (BYTE)n; + } + } + + /* fill DTable + * We fill all entries of each weight in order. + * That way length is a constant for each iteration of the outter loop. + * We can switch based on the length to a different inner loop which is + * optimized for that particular case. + */ + { + U32 w; + int symbol=wksp->rankVal[0]; + int rankStart=0; + for (w=1; wrankVal[w]; + int const length = (1 << w) >> 1; + int uStart = rankStart; + BYTE const nbBits = (BYTE)(tableLog + 1 - w); + int s; + int u; + switch (length) { + case 1: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart] = D; + uStart += 1; + } + break; + case 2: + for (s=0; ssymbols[symbol + s]; + D.nbBits = nbBits; + dt[uStart+0] = D; + dt[uStart+1] = D; + uStart += 2; + } + break; + case 4: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + uStart += 4; + } + break; + case 8: + for (s=0; ssymbols[symbol + s], nbBits); + MEM_write64(dt + uStart, D4); + MEM_write64(dt + uStart + 4, D4); + uStart += 8; + } + break; + default: + for (s=0; ssymbols[symbol + s], nbBits); + for (u=0; u < length; u += 16) { + MEM_write64(dt + uStart + u + 0, D4); + MEM_write64(dt + uStart + u + 4, D4); + MEM_write64(dt + uStart + u + 8, D4); + MEM_write64(dt + uStart + u + 12, D4); + } + assert(u == length); + uStart += length; + } + break; + } + symbol += symbolCount; + rankStart += symbolCount * length; + } + } + return iSize; +} + +FORCE_INLINE_TEMPLATE BYTE +HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ + BYTE const c = dt[val].byte; + BIT_skipBits(Dstream, dt[val].nbBits); + return c; +} + +#define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \ + *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) + +#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) + +HINT_INLINE size_t +HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 4 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) { + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_1(p, bitDPtr); + HUF_DECODE_SYMBOLX1_2(p, bitDPtr); + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + } + + /* [0-3] symbols remaining */ + if (MEM_32bits()) + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd)) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + /* no more data to retrieve from bitstream, no need to reload */ + while (p < pEnd) + HUF_DECODE_SYMBOLX1_0(p, bitDPtr); + + return pEnd-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BYTE* op = (BYTE*)dst; + BYTE* const oend = op + dstSize; + const void* dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + BIT_DStream_t bitD; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog); + + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + return dstSize; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X1_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + /* Check */ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - 3; + const void* const dtPtr = DTable + 1; + const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + const size_t segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + U32 endSignal = 1; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */ + for ( ; (endSignal) & (op4 < olimit) ; ) { + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_1(op1, &bitD1); + HUF_DECODE_SYMBOLX1_1(op2, &bitD2); + HUF_DECODE_SYMBOLX1_1(op3, &bitD3); + HUF_DECODE_SYMBOLX1_1(op4, &bitD4); + HUF_DECODE_SYMBOLX1_2(op1, &bitD1); + HUF_DECODE_SYMBOLX1_2(op2, &bitD2); + HUF_DECODE_SYMBOLX1_2(op3, &bitD3); + HUF_DECODE_SYMBOLX1_2(op4, &bitD4); + HUF_DECODE_SYMBOLX1_0(op1, &bitD1); + HUF_DECODE_SYMBOLX1_0(op2, &bitD2); + HUF_DECODE_SYMBOLX1_0(op3, &bitD3); + HUF_DECODE_SYMBOLX1_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; + } + + /* check corruption */ + /* note : should not be necessary : op# advance in lock step, and we control op4. + * but curiously, binary generated by gcc 7.2 & 7.3 with -mbmi2 runs faster when >=1 test is present */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 supposed already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + + +typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize, + const void *cSrc, + size_t cSrcSize, + const HUF_DTable *DTable); + +HUF_DGEN(HUF_decompress1X1_usingDTable_internal) +HUF_DGEN(HUF_decompress4X1_usingDTable_internal) + + + +size_t HUF_decompress1X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress4X1_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 0) return ERROR(GENERIC); + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0); +} + + +#endif /* HUF_FORCE_DECOMPRESS_X2 */ + + +#ifndef HUF_FORCE_DECOMPRESS_X1 + +/* *************************/ +/* double-symbols decoding */ +/* *************************/ + +typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */ +typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; +typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; +typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX]; + + +/* HUF_fillDTableX2Level2() : + * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ +static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed, + const U32* rankValOrigin, const int minWeight, + const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, + U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize) +{ + HUF_DEltX2 DElt; + U32* rankVal = wksp; + + assert(wkspSize >= HUF_TABLELOG_MAX + 1); + (void)wkspSize; + /* get pre-calculated rankVal */ + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1)); + + /* fill skipped values */ + if (minWeight>1) { + U32 i, skipSize = rankVal[minWeight]; + MEM_writeLE16(&(DElt.sequence), baseSeq); + DElt.nbBits = (BYTE)(consumed); + DElt.length = 1; + for (i = 0; i < skipSize; i++) + DTable[i] = DElt; + } + + /* fill DTable */ + { U32 s; for (s=0; s= 1 */ + + rankVal[weight] += length; + } } +} + + +static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog, + const sortedSymbol_t* sortedList, const U32 sortedListSize, + const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, + const U32 nbBitsBaseline, U32* wksp, size_t wkspSize) +{ + U32* rankVal = wksp; + const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ + const U32 minBits = nbBitsBaseline - maxWeight; + U32 s; + + assert(wkspSize >= HUF_TABLELOG_MAX + 1); + wksp += HUF_TABLELOG_MAX + 1; + wkspSize -= HUF_TABLELOG_MAX + 1; + + ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1)); + + /* fill DTable */ + for (s=0; s= minBits) { /* enough room for a second symbol */ + U32 sortedRank; + int minWeight = nbBits + scaleLog; + if (minWeight < 1) minWeight = 1; + sortedRank = rankStart[minWeight]; + HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits, + rankValOrigin[nbBits], minWeight, + sortedList+sortedRank, sortedListSize-sortedRank, + nbBitsBaseline, symbol, wksp, wkspSize); + } else { + HUF_DEltX2 DElt; + MEM_writeLE16(&(DElt.sequence), symbol); + DElt.nbBits = (BYTE)(nbBits); + DElt.length = 1; + { U32 const end = start + length; + U32 u; + for (u = start; u < end; u++) DTable[u] = DElt; + } } + rankVal[weight] += length; + } +} + +typedef struct { + rankValCol_t rankVal[HUF_TABLELOG_MAX]; + U32 rankStats[HUF_TABLELOG_MAX + 1]; + U32 rankStart0[HUF_TABLELOG_MAX + 2]; + sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; + BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; + U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32]; +} HUF_ReadDTableX2_Workspace; + +size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, + const void* src, size_t srcSize, + void* workSpace, size_t wkspSize) +{ + U32 tableLog, maxW, sizeOfSort, nbSymbols; + DTableDesc dtd = HUF_getDTableDesc(DTable); + U32 const maxTableLog = dtd.maxTableLog; + size_t iSize; + void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */ + HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr; + U32 *rankStart; + + HUF_ReadDTableX2_Workspace* const wksp = (HUF_ReadDTableX2_Workspace*)workSpace; + + if (sizeof(*wksp) > wkspSize) return ERROR(GENERIC); + + rankStart = wksp->rankStart0 + 1; + ZSTD_memset(wksp->rankStats, 0, sizeof(wksp->rankStats)); + ZSTD_memset(wksp->rankStart0, 0, sizeof(wksp->rankStart0)); + + DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ + if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); + /* ZSTD_memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ + + iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0); + if (HUF_isError(iSize)) return iSize; + + /* check result */ + if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ + + /* find maxWeight */ + for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */ + + /* Get start index of each weight */ + { U32 w, nextRankStart = 0; + for (w=1; wrankStats[w]; + rankStart[w] = curr; + } + rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ + sizeOfSort = nextRankStart; + } + + /* sort symbols by weight */ + { U32 s; + for (s=0; sweightList[s]; + U32 const r = rankStart[w]++; + wksp->sortedSymbol[r].symbol = (BYTE)s; + wksp->sortedSymbol[r].weight = (BYTE)w; + } + rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ + } + + /* Build rankVal */ + { U32* const rankVal0 = wksp->rankVal[0]; + { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */ + U32 nextRankVal = 0; + U32 w; + for (w=1; wrankStats[w] << (w+rescale); + rankVal0[w] = curr; + } } + { U32 const minBits = tableLog+1 - maxW; + U32 consumed; + for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { + U32* const rankValPtr = wksp->rankVal[consumed]; + U32 w; + for (w = 1; w < maxW+1; w++) { + rankValPtr[w] = rankVal0[w] >> consumed; + } } } } + + HUF_fillDTableX2(dt, maxTableLog, + wksp->sortedSymbol, sizeOfSort, + wksp->rankStart0, wksp->rankVal, maxW, + tableLog+1, + wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32)); + + dtd.tableLog = (BYTE)maxTableLog; + dtd.tableType = 1; + ZSTD_memcpy(DTable, &dtd, sizeof(dtd)); + return iSize; +} + + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + ZSTD_memcpy(op, dt+val, 2); + BIT_skipBits(DStream, dt[val].nbBits); + return dt[val].length; +} + +FORCE_INLINE_TEMPLATE U32 +HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog) +{ + size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ + ZSTD_memcpy(op, dt+val, 1); + if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits); + else { + if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) { + BIT_skipBits(DStream, dt[val].nbBits); + if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8)) + /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ + DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); + } } + return 1; +} + +#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ + if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ + if (MEM_64bits()) \ + ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog) + +HINT_INLINE size_t +HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, + const HUF_DEltX2* const dt, const U32 dtLog) +{ + BYTE* const pStart = p; + + /* up to 8 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) { + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_1(p, bitDPtr); + HUF_DECODE_SYMBOLX2_2(p, bitDPtr); + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + } + + /* closer to end : up to 2 symbols at a time */ + while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2)) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); + + while (p <= pEnd-2) + HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ + + if (p < pEnd) + p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog); + + return p-pStart; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress1X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + BIT_DStream_t bitD; + + /* Init */ + CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) ); + + /* decode */ + { BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */ + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog); + } + + /* check */ + if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected); + + /* decoded size */ + return dstSize; +} + +FORCE_INLINE_TEMPLATE size_t +HUF_decompress4X2_usingDTable_internal_body( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ + + { const BYTE* const istart = (const BYTE*) cSrc; + BYTE* const ostart = (BYTE*) dst; + BYTE* const oend = ostart + dstSize; + BYTE* const olimit = oend - (sizeof(size_t)-1); + const void* const dtPtr = DTable+1; + const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr; + + /* Init */ + BIT_DStream_t bitD1; + BIT_DStream_t bitD2; + BIT_DStream_t bitD3; + BIT_DStream_t bitD4; + size_t const length1 = MEM_readLE16(istart); + size_t const length2 = MEM_readLE16(istart+2); + size_t const length3 = MEM_readLE16(istart+4); + size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); + const BYTE* const istart1 = istart + 6; /* jumpTable */ + const BYTE* const istart2 = istart1 + length1; + const BYTE* const istart3 = istart2 + length2; + const BYTE* const istart4 = istart3 + length3; + size_t const segmentSize = (dstSize+3) / 4; + BYTE* const opStart2 = ostart + segmentSize; + BYTE* const opStart3 = opStart2 + segmentSize; + BYTE* const opStart4 = opStart3 + segmentSize; + BYTE* op1 = ostart; + BYTE* op2 = opStart2; + BYTE* op3 = opStart3; + BYTE* op4 = opStart4; + U32 endSignal = 1; + DTableDesc const dtd = HUF_getDTableDesc(DTable); + U32 const dtLog = dtd.tableLog; + + if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */ + CHECK_F( BIT_initDStream(&bitD1, istart1, length1) ); + CHECK_F( BIT_initDStream(&bitD2, istart2, length2) ); + CHECK_F( BIT_initDStream(&bitD3, istart3, length3) ); + CHECK_F( BIT_initDStream(&bitD4, istart4, length4) ); + + /* 16-32 symbols per loop (4-8 symbols per stream) */ + for ( ; (endSignal) & (op4 < olimit); ) { +#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished; + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished; + endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished; +#else + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_1(op1, &bitD1); + HUF_DECODE_SYMBOLX2_1(op2, &bitD2); + HUF_DECODE_SYMBOLX2_1(op3, &bitD3); + HUF_DECODE_SYMBOLX2_1(op4, &bitD4); + HUF_DECODE_SYMBOLX2_2(op1, &bitD1); + HUF_DECODE_SYMBOLX2_2(op2, &bitD2); + HUF_DECODE_SYMBOLX2_2(op3, &bitD3); + HUF_DECODE_SYMBOLX2_2(op4, &bitD4); + HUF_DECODE_SYMBOLX2_0(op1, &bitD1); + HUF_DECODE_SYMBOLX2_0(op2, &bitD2); + HUF_DECODE_SYMBOLX2_0(op3, &bitD3); + HUF_DECODE_SYMBOLX2_0(op4, &bitD4); + endSignal = (U32)LIKELY( + (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished) + & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished)); +#endif + } + + /* check corruption */ + if (op1 > opStart2) return ERROR(corruption_detected); + if (op2 > opStart3) return ERROR(corruption_detected); + if (op3 > opStart4) return ERROR(corruption_detected); + /* note : op4 already verified within main loop */ + + /* finish bitStreams one by one */ + HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); + HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); + HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); + HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); + + /* check */ + { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); + if (!endCheck) return ERROR(corruption_detected); } + + /* decoded size */ + return dstSize; + } +} + +HUF_DGEN(HUF_decompress1X2_usingDTable_internal) +HUF_DGEN(HUF_decompress4X2_usingDTable_internal) + +size_t HUF_decompress1X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0); +} + + +size_t HUF_decompress4X2_usingDTable( + void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc dtd = HUF_getDTableDesc(DTable); + if (dtd.tableType != 1) return ERROR(GENERIC); + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +} + +static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, + workSpace, wkspSize); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} + +size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0); +} + + +#endif /* HUF_FORCE_DECOMPRESS_X1 */ + + +/* ***********************************/ +/* Universal decompression selectors */ +/* ***********************************/ + +size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif +} + +size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, + const void* cSrc, size_t cSrcSize, + const HUF_DTable* DTable) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0); +#endif +} + + +#if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2) +typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t; +static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = +{ + /* single, double, quad */ + {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */ + {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */ + {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ + {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ + {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ + {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ + {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ + {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ + {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ + {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ + {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ + {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ + {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ + {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */ + {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */ + {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */ +}; +#endif + +/** HUF_selectDecoder() : + * Tells which decoder is likely to decode faster, + * based on a set of pre-computed metrics. + * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 . + * Assumption : 0 < dstSize <= 128 KB */ +U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize) +{ + assert(dstSize > 0); + assert(dstSize <= 128*1024); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dstSize; + (void)cSrcSize; + return 0; +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dstSize; + (void)cSrcSize; + return 1; +#else + /* decoder timing evaluation */ + { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */ + U32 const D256 = (U32)(dstSize >> 8); + U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); + U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); + DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, to reduce cache eviction */ + return DTime1 < DTime0; + } +#endif +} + + +size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, + size_t dstSize, const void* cSrc, + size_t cSrcSize, void* workSpace, + size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize); +#endif + } +} + +size_t HUF_decompress1X_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, + const void* cSrc, size_t cSrcSize, + void* workSpace, size_t wkspSize) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */ + if (cSrcSize == dstSize) { ZSTD_memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */ + if (cSrcSize == 1) { ZSTD_memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */ + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#else + return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize): + HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc, + cSrcSize, workSpace, wkspSize); +#endif + } +} + + +size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif +} + +#ifndef HUF_FORCE_DECOMPRESS_X2 +size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + const BYTE* ip = (const BYTE*) cSrc; + + size_t const hSize = HUF_readDTableX1_wksp_bmi2(dctx, cSrc, cSrcSize, workSpace, wkspSize, bmi2); + if (HUF_isError(hSize)) return hSize; + if (hSize >= cSrcSize) return ERROR(srcSize_wrong); + ip += hSize; cSrcSize -= hSize; + + return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2); +} +#endif + +size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2) +{ + DTableDesc const dtd = HUF_getDTableDesc(DTable); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)dtd; + assert(dtd.tableType == 0); + return HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)dtd; + assert(dtd.tableType == 1); + return HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#else + return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) : + HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2); +#endif +} + +size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2) +{ + /* validation checks */ + if (dstSize == 0) return ERROR(dstSize_tooSmall); + if (cSrcSize == 0) return ERROR(corruption_detected); + + { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); +#if defined(HUF_FORCE_DECOMPRESS_X1) + (void)algoNb; + assert(algoNb == 0); + return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#elif defined(HUF_FORCE_DECOMPRESS_X2) + (void)algoNb; + assert(algoNb == 1); + return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#else + return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) : + HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2); +#endif + } +} + diff --git a/lib/zstd/decompress/zstd_ddict.c b/lib/zstd/decompress/zstd_ddict.c new file mode 100644 index 000000000000..dbbc7919de53 --- /dev/null +++ b/lib/zstd/decompress/zstd_ddict.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_ddict.c : + * concentrates all logic that needs to know the internals of ZSTD_DDict object */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ +#include "../common/cpu.h" /* bmi2 */ +#include "../common/mem.h" /* low level memory routines */ +#define FSE_STATIC_LINKING_ONLY +#include "../common/fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "../common/huf.h" +#include "zstd_decompress_internal.h" +#include "zstd_ddict.h" + + + + +/*-******************************************************* +* Types +*********************************************************/ +struct ZSTD_DDict_s { + void* dictBuffer; + const void* dictContent; + size_t dictSize; + ZSTD_entropyDTables_t entropy; + U32 dictID; + U32 entropyPresent; + ZSTD_customMem cMem; +}; /* typedef'd to ZSTD_DDict within "zstd.h" */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) +{ + assert(ddict != NULL); + return ddict->dictContent; +} + +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) +{ + assert(ddict != NULL); + return ddict->dictSize; +} + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_copyDDictParameters"); + assert(dctx != NULL); + assert(ddict != NULL); + dctx->dictID = ddict->dictID; + dctx->prefixStart = ddict->dictContent; + dctx->virtualStart = ddict->dictContent; + dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; + dctx->previousDstEnd = dctx->dictEnd; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif + if (ddict->entropyPresent) { + dctx->litEntropy = 1; + dctx->fseEntropy = 1; + dctx->LLTptr = ddict->entropy.LLTable; + dctx->MLTptr = ddict->entropy.MLTable; + dctx->OFTptr = ddict->entropy.OFTable; + dctx->HUFptr = ddict->entropy.hufTable; + dctx->entropy.rep[0] = ddict->entropy.rep[0]; + dctx->entropy.rep[1] = ddict->entropy.rep[1]; + dctx->entropy.rep[2] = ddict->entropy.rep[2]; + } else { + dctx->litEntropy = 0; + dctx->fseEntropy = 0; + } +} + + +static size_t +ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, + ZSTD_dictContentType_e dictContentType) +{ + ddict->dictID = 0; + ddict->entropyPresent = 0; + if (dictContentType == ZSTD_dct_rawContent) return 0; + + if (ddict->dictSize < 8) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + { U32 const magic = MEM_readLE32(ddict->dictContent); + if (magic != ZSTD_MAGIC_DICTIONARY) { + if (dictContentType == ZSTD_dct_fullDict) + return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ + return 0; /* pure content mode */ + } + } + ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( + &ddict->entropy, ddict->dictContent, ddict->dictSize)), + dictionary_corrupted, ""); + ddict->entropyPresent = 1; + return 0; +} + + +static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { + ddict->dictBuffer = NULL; + ddict->dictContent = dict; + if (!dict) dictSize = 0; + } else { + void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem); + ddict->dictBuffer = internalBuffer; + ddict->dictContent = internalBuffer; + if (!internalBuffer) return ERROR(memory_allocation); + ZSTD_memcpy(internalBuffer, dict, dictSize); + } + ddict->dictSize = dictSize; + ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + + /* parse dictionary content */ + FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , ""); + + return 0; +} + +ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType, + ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem); + if (ddict == NULL) return NULL; + ddict->cMem = customMem; + { size_t const initResult = ZSTD_initDDict_internal(ddict, + dict, dictSize, + dictLoadMethod, dictContentType); + if (ZSTD_isError(initResult)) { + ZSTD_freeDDict(ddict); + return NULL; + } } + return ddict; + } +} + +/*! ZSTD_createDDict() : +* Create a digested dictionary, to start decompression without startup delay. +* `dict` content is copied inside DDict. +* Consequently, `dict` can be released after `ZSTD_DDict` creation */ +ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); +} + +/*! ZSTD_createDDict_byReference() : + * Create a digested dictionary, to start decompression without startup delay. + * Dictionary content is simply referenced, it will be accessed during decompression. + * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ +ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) +{ + ZSTD_customMem const allocator = { NULL, NULL, NULL }; + return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); +} + + +const ZSTD_DDict* ZSTD_initStaticDDict( + void* sBuffer, size_t sBufferSize, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + size_t const neededSpace = sizeof(ZSTD_DDict) + + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); + ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; + assert(sBuffer != NULL); + assert(dict != NULL); + if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ + if (sBufferSize < neededSpace) return NULL; + if (dictLoadMethod == ZSTD_dlm_byCopy) { + ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */ + dict = ddict+1; + } + if (ZSTD_isError( ZSTD_initDDict_internal(ddict, + dict, dictSize, + ZSTD_dlm_byRef, dictContentType) )) + return NULL; + return ddict; +} + + +size_t ZSTD_freeDDict(ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support free on NULL */ + { ZSTD_customMem const cMem = ddict->cMem; + ZSTD_customFree(ddict->dictBuffer, cMem); + ZSTD_customFree(ddict, cMem); + return 0; + } +} + +/*! ZSTD_estimateDDictSize() : + * Estimate amount of memory that will be needed to create a dictionary for decompression. + * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ +size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) +{ + return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); +} + +size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; /* support sizeof on NULL */ + return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; +} + +/*! ZSTD_getDictID_fromDDict() : + * Provides the dictID of the dictionary loaded into `ddict`. + * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. + * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ +unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) +{ + if (ddict==NULL) return 0; + return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize); +} diff --git a/lib/zstd/decompress/zstd_ddict.h b/lib/zstd/decompress/zstd_ddict.h new file mode 100644 index 000000000000..8c1a79d666f8 --- /dev/null +++ b/lib/zstd/decompress/zstd_ddict.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef ZSTD_DDICT_H +#define ZSTD_DDICT_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +#include "../common/zstd_deps.h" /* size_t */ +#include /* ZSTD_DDict, and several public functions */ + + +/*-******************************************************* + * Interface + *********************************************************/ + +/* note: several prototypes are already published in `zstd.h` : + * ZSTD_createDDict() + * ZSTD_createDDict_byReference() + * ZSTD_createDDict_advanced() + * ZSTD_freeDDict() + * ZSTD_initStaticDDict() + * ZSTD_sizeof_DDict() + * ZSTD_estimateDDictSize() + * ZSTD_getDictID_fromDict() + */ + +const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict); +size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict); + +void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict); + + + +#endif /* ZSTD_DDICT_H */ diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c new file mode 100644 index 000000000000..16b4ea795a7e --- /dev/null +++ b/lib/zstd/decompress/zstd_decompress.c @@ -0,0 +1,2075 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* *************************************************************** +* Tuning parameters +*****************************************************************/ +/*! + * HEAPMODE : + * Select how default decompression function ZSTD_decompress() allocates its context, + * on stack (0), or into heap (1, default; requires malloc()). + * Note that functions with explicit context such as ZSTD_decompressDCtx() are unaffected. + */ +#ifndef ZSTD_HEAPMODE +# define ZSTD_HEAPMODE 1 +#endif + +/*! +* LEGACY_SUPPORT : +* if set to 1+, ZSTD_decompress() can decode older formats (v0.1+) +*/ + +/*! + * MAXWINDOWSIZE_DEFAULT : + * maximum window size accepted by DStream __by default__. + * Frames requiring more memory will be rejected. + * It's possible to set a different limit using ZSTD_DCtx_setMaxWindowSize(). + */ +#ifndef ZSTD_MAXWINDOWSIZE_DEFAULT +# define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) + 1) +#endif + +/*! + * NO_FORWARD_PROGRESS_MAX : + * maximum allowed nb of calls to ZSTD_decompressStream() + * without any forward progress + * (defined as: no byte read from input, and no byte flushed to output) + * before triggering an error. + */ +#ifndef ZSTD_NO_FORWARD_PROGRESS_MAX +# define ZSTD_NO_FORWARD_PROGRESS_MAX 16 +#endif + + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ +#include "../common/cpu.h" /* bmi2 */ +#include "../common/mem.h" /* low level memory routines */ +#define FSE_STATIC_LINKING_ONLY +#include "../common/fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "../common/huf.h" +#include /* xxh64_reset, xxh64_update, xxh64_digest, XXH64 */ +#include "../common/zstd_internal.h" /* blockProperties_t */ +#include "zstd_decompress_internal.h" /* ZSTD_DCtx */ +#include "zstd_ddict.h" /* ZSTD_DDictDictContent */ +#include "zstd_decompress_block.h" /* ZSTD_decompressBlock_internal */ + + + + +/************************************* + * Multiple DDicts Hashset internals * + *************************************/ + +#define DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT 4 +#define DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT 3 /* These two constants represent SIZE_MULT/COUNT_MULT load factor without using a float. + * Currently, that means a 0.75 load factor. + * So, if count * COUNT_MULT / size * SIZE_MULT != 0, then we've exceeded + * the load factor of the ddict hash set. + */ + +#define DDICT_HASHSET_TABLE_BASE_SIZE 64 +#define DDICT_HASHSET_RESIZE_FACTOR 2 + +/* Hash function to determine starting position of dict insertion within the table + * Returns an index between [0, hashSet->ddictPtrTableSize] + */ +static size_t ZSTD_DDictHashSet_getIndex(const ZSTD_DDictHashSet* hashSet, U32 dictID) { + const U64 hash = xxh64(&dictID, sizeof(U32), 0); + /* DDict ptr table size is a multiple of 2, use size - 1 as mask to get index within [0, hashSet->ddictPtrTableSize) */ + return hash & (hashSet->ddictPtrTableSize - 1); +} + +/* Adds DDict to a hashset without resizing it. + * If inserting a DDict with a dictID that already exists in the set, replaces the one in the set. + * Returns 0 if successful, or a zstd error code if something went wrong. + */ +static size_t ZSTD_DDictHashSet_emplaceDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict) { + const U32 dictID = ZSTD_getDictID_fromDDict(ddict); + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + RETURN_ERROR_IF(hashSet->ddictPtrCount == hashSet->ddictPtrTableSize, GENERIC, "Hash set is full!"); + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + while (hashSet->ddictPtrTable[idx] != NULL) { + /* Replace existing ddict if inserting ddict with same dictID */ + if (ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]) == dictID) { + DEBUGLOG(4, "DictID already exists, replacing rather than adding"); + hashSet->ddictPtrTable[idx] = ddict; + return 0; + } + idx &= idxRangeMask; + idx++; + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + hashSet->ddictPtrTable[idx] = ddict; + hashSet->ddictPtrCount++; + return 0; +} + +/* Expands hash table by factor of DDICT_HASHSET_RESIZE_FACTOR and + * rehashes all values, allocates new table, frees old table. + * Returns 0 on success, otherwise a zstd error code. + */ +static size_t ZSTD_DDictHashSet_expand(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + size_t newTableSize = hashSet->ddictPtrTableSize * DDICT_HASHSET_RESIZE_FACTOR; + const ZSTD_DDict** newTable = (const ZSTD_DDict**)ZSTD_customCalloc(sizeof(ZSTD_DDict*) * newTableSize, customMem); + const ZSTD_DDict** oldTable = hashSet->ddictPtrTable; + size_t oldTableSize = hashSet->ddictPtrTableSize; + size_t i; + + DEBUGLOG(4, "Expanding DDict hash table! Old size: %zu new size: %zu", oldTableSize, newTableSize); + RETURN_ERROR_IF(!newTable, memory_allocation, "Expanded hashset allocation failed!"); + hashSet->ddictPtrTable = newTable; + hashSet->ddictPtrTableSize = newTableSize; + hashSet->ddictPtrCount = 0; + for (i = 0; i < oldTableSize; ++i) { + if (oldTable[i] != NULL) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, oldTable[i]), ""); + } + } + ZSTD_customFree((void*)oldTable, customMem); + DEBUGLOG(4, "Finished re-hash"); + return 0; +} + +/* Fetches a DDict with the given dictID + * Returns the ZSTD_DDict* with the requested dictID. If it doesn't exist, then returns NULL. + */ +static const ZSTD_DDict* ZSTD_DDictHashSet_getDDict(ZSTD_DDictHashSet* hashSet, U32 dictID) { + size_t idx = ZSTD_DDictHashSet_getIndex(hashSet, dictID); + const size_t idxRangeMask = hashSet->ddictPtrTableSize - 1; + DEBUGLOG(4, "Hashed index: for dictID: %u is %zu", dictID, idx); + for (;;) { + size_t currDictID = ZSTD_getDictID_fromDDict(hashSet->ddictPtrTable[idx]); + if (currDictID == dictID || currDictID == 0) { + /* currDictID == 0 implies a NULL ddict entry */ + break; + } else { + idx &= idxRangeMask; /* Goes to start of table when we reach the end */ + idx++; + } + } + DEBUGLOG(4, "Final idx after probing for dictID %u is: %zu", dictID, idx); + return hashSet->ddictPtrTable[idx]; +} + +/* Allocates space for and returns a ddict hash set + * The hash set's ZSTD_DDict* table has all values automatically set to NULL to begin with. + * Returns NULL if allocation failed. + */ +static ZSTD_DDictHashSet* ZSTD_createDDictHashSet(ZSTD_customMem customMem) { + ZSTD_DDictHashSet* ret = (ZSTD_DDictHashSet*)ZSTD_customMalloc(sizeof(ZSTD_DDictHashSet), customMem); + DEBUGLOG(4, "Allocating new hash set"); + ret->ddictPtrTable = (const ZSTD_DDict**)ZSTD_customCalloc(DDICT_HASHSET_TABLE_BASE_SIZE * sizeof(ZSTD_DDict*), customMem); + ret->ddictPtrTableSize = DDICT_HASHSET_TABLE_BASE_SIZE; + ret->ddictPtrCount = 0; + if (!ret || !ret->ddictPtrTable) { + return NULL; + } + return ret; +} + +/* Frees the table of ZSTD_DDict* within a hashset, then frees the hashset itself. + * Note: The ZSTD_DDict* within the table are NOT freed. + */ +static void ZSTD_freeDDictHashSet(ZSTD_DDictHashSet* hashSet, ZSTD_customMem customMem) { + DEBUGLOG(4, "Freeing ddict hash set"); + if (hashSet && hashSet->ddictPtrTable) { + ZSTD_customFree((void*)hashSet->ddictPtrTable, customMem); + } + if (hashSet) { + ZSTD_customFree(hashSet, customMem); + } +} + +/* Public function: Adds a DDict into the ZSTD_DDictHashSet, possibly triggering a resize of the hash set. + * Returns 0 on success, or a ZSTD error. + */ +static size_t ZSTD_DDictHashSet_addDDict(ZSTD_DDictHashSet* hashSet, const ZSTD_DDict* ddict, ZSTD_customMem customMem) { + DEBUGLOG(4, "Adding dict ID: %u to hashset with - Count: %zu Tablesize: %zu", ZSTD_getDictID_fromDDict(ddict), hashSet->ddictPtrCount, hashSet->ddictPtrTableSize); + if (hashSet->ddictPtrCount * DDICT_HASHSET_MAX_LOAD_FACTOR_COUNT_MULT / hashSet->ddictPtrTableSize * DDICT_HASHSET_MAX_LOAD_FACTOR_SIZE_MULT != 0) { + FORWARD_IF_ERROR(ZSTD_DDictHashSet_expand(hashSet, customMem), ""); + } + FORWARD_IF_ERROR(ZSTD_DDictHashSet_emplaceDDict(hashSet, ddict), ""); + return 0; +} + +/*-************************************************************* +* Context management +***************************************************************/ +size_t ZSTD_sizeof_DCtx (const ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support sizeof NULL */ + return sizeof(*dctx) + + ZSTD_sizeof_DDict(dctx->ddictLocal) + + dctx->inBuffSize + dctx->outBuffSize; +} + +size_t ZSTD_estimateDCtxSize(void) { return sizeof(ZSTD_DCtx); } + + +static size_t ZSTD_startingInputLength(ZSTD_format_e format) +{ + size_t const startingInputLength = ZSTD_FRAMEHEADERSIZE_PREFIX(format); + /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */ + assert( (format == ZSTD_f_zstd1) || (format == ZSTD_f_zstd1_magicless) ); + return startingInputLength; +} + +static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx) +{ + assert(dctx->streamStage == zdss_init); + dctx->format = ZSTD_f_zstd1; + dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; + dctx->outBufferMode = ZSTD_bm_buffered; + dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum; + dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict; +} + +static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx) +{ + dctx->staticSize = 0; + dctx->ddict = NULL; + dctx->ddictLocal = NULL; + dctx->dictEnd = NULL; + dctx->ddictIsCold = 0; + dctx->dictUses = ZSTD_dont_use; + dctx->inBuff = NULL; + dctx->inBuffSize = 0; + dctx->outBuffSize = 0; + dctx->streamStage = zdss_init; + dctx->legacyContext = NULL; + dctx->previousLegacyVersion = 0; + dctx->noForwardProgress = 0; + dctx->oversizedDuration = 0; + dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid()); + dctx->ddictSet = NULL; + ZSTD_DCtx_resetParameters(dctx); +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentEndForFuzzing = NULL; +#endif +} + +ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize) +{ + ZSTD_DCtx* const dctx = (ZSTD_DCtx*) workspace; + + if ((size_t)workspace & 7) return NULL; /* 8-aligned */ + if (workspaceSize < sizeof(ZSTD_DCtx)) return NULL; /* minimum size */ + + ZSTD_initDCtx_internal(dctx); + dctx->staticSize = workspaceSize; + dctx->inBuff = (char*)(dctx+1); + return dctx; +} + +ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem) +{ + if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; + + { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem); + if (!dctx) return NULL; + dctx->customMem = customMem; + ZSTD_initDCtx_internal(dctx); + return dctx; + } +} + +ZSTD_DCtx* ZSTD_createDCtx(void) +{ + DEBUGLOG(3, "ZSTD_createDCtx"); + return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); +} + +static void ZSTD_clearDict(ZSTD_DCtx* dctx) +{ + ZSTD_freeDDict(dctx->ddictLocal); + dctx->ddictLocal = NULL; + dctx->ddict = NULL; + dctx->dictUses = ZSTD_dont_use; +} + +size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) +{ + if (dctx==NULL) return 0; /* support free on NULL */ + RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); + { ZSTD_customMem const cMem = dctx->customMem; + ZSTD_clearDict(dctx); + ZSTD_customFree(dctx->inBuff, cMem); + dctx->inBuff = NULL; + if (dctx->ddictSet) { + ZSTD_freeDDictHashSet(dctx->ddictSet, cMem); + dctx->ddictSet = NULL; + } + ZSTD_customFree(dctx, cMem); + return 0; + } +} + +/* no longer useful */ +void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, const ZSTD_DCtx* srcDCtx) +{ + size_t const toCopy = (size_t)((char*)(&dstDCtx->inBuff) - (char*)dstDCtx); + ZSTD_memcpy(dstDCtx, srcDCtx, toCopy); /* no need to copy workspace */ +} + +/* Given a dctx with a digested frame params, re-selects the correct ZSTD_DDict based on + * the requested dict ID from the frame. If there exists a reference to the correct ZSTD_DDict, then + * accordingly sets the ddict to be used to decompress the frame. + * + * If no DDict is found, then no action is taken, and the ZSTD_DCtx::ddict remains as-is. + * + * ZSTD_d_refMultipleDDicts must be enabled for this function to be called. + */ +static void ZSTD_DCtx_selectFrameDDict(ZSTD_DCtx* dctx) { + assert(dctx->refMultipleDDicts && dctx->ddictSet); + DEBUGLOG(4, "Adjusting DDict based on requested dict ID from frame"); + if (dctx->ddict) { + const ZSTD_DDict* frameDDict = ZSTD_DDictHashSet_getDDict(dctx->ddictSet, dctx->fParams.dictID); + if (frameDDict) { + DEBUGLOG(4, "DDict found!"); + ZSTD_clearDict(dctx); + dctx->dictID = dctx->fParams.dictID; + dctx->ddict = frameDDict; + dctx->dictUses = ZSTD_use_indefinitely; + } + } +} + + +/*-************************************************************* + * Frame header decoding + ***************************************************************/ + +/*! ZSTD_isFrame() : + * Tells if the content of `buffer` starts with a valid Frame Identifier. + * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. + * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. + * Note 3 : Skippable Frame Identifiers are considered valid. */ +unsigned ZSTD_isFrame(const void* buffer, size_t size) +{ + if (size < ZSTD_FRAMEIDSIZE) return 0; + { U32 const magic = MEM_readLE32(buffer); + if (magic == ZSTD_MAGICNUMBER) return 1; + if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1; + } + return 0; +} + +/** ZSTD_frameHeaderSize_internal() : + * srcSize must be large enough to reach header size fields. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless. + * @return : size of the Frame Header + * or an error code, which can be tested with ZSTD_isError() */ +static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) +{ + size_t const minInputSize = ZSTD_startingInputLength(format); + RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong, ""); + + { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; + U32 const dictID= fhd & 3; + U32 const singleSegment = (fhd >> 5) & 1; + U32 const fcsId = fhd >> 6; + return minInputSize + !singleSegment + + ZSTD_did_fieldSize[dictID] + ZSTD_fcs_fieldSize[fcsId] + + (singleSegment && !fcsId); + } +} + +/** ZSTD_frameHeaderSize() : + * srcSize must be >= ZSTD_frameHeaderSize_prefix. + * @return : size of the Frame Header, + * or an error code (if srcSize is too small) */ +size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize) +{ + return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1); +} + + +/** ZSTD_getFrameHeader_advanced() : + * decode Frame Header, or require larger `srcSize`. + * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format) +{ + const BYTE* ip = (const BYTE*)src; + size_t const minInputSize = ZSTD_startingInputLength(format); + + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ + if (srcSize < minInputSize) return minInputSize; + RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); + + if ( (format != ZSTD_f_zstd1_magicless) + && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + /* skippable frame */ + if (srcSize < ZSTD_SKIPPABLEHEADERSIZE) + return ZSTD_SKIPPABLEHEADERSIZE; /* magic number + frame length */ + ZSTD_memset(zfhPtr, 0, sizeof(*zfhPtr)); + zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE); + zfhPtr->frameType = ZSTD_skippableFrame; + return 0; + } + RETURN_ERROR(prefix_unknown, ""); + } + + /* ensure there is enough `srcSize` to fully read/decode frame header */ + { size_t const fhsize = ZSTD_frameHeaderSize_internal(src, srcSize, format); + if (srcSize < fhsize) return fhsize; + zfhPtr->headerSize = (U32)fhsize; + } + + { BYTE const fhdByte = ip[minInputSize-1]; + size_t pos = minInputSize; + U32 const dictIDSizeCode = fhdByte&3; + U32 const checksumFlag = (fhdByte>>2)&1; + U32 const singleSegment = (fhdByte>>5)&1; + U32 const fcsID = fhdByte>>6; + U64 windowSize = 0; + U32 dictID = 0; + U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; + RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported, + "reserved bits, must be zero"); + + if (!singleSegment) { + BYTE const wlByte = ip[pos++]; + U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; + RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge, ""); + windowSize = (1ULL << windowLog); + windowSize += (windowSize >> 3) * (wlByte&7); + } + switch(dictIDSizeCode) + { + default: assert(0); /* impossible */ + case 0 : break; + case 1 : dictID = ip[pos]; pos++; break; + case 2 : dictID = MEM_readLE16(ip+pos); pos+=2; break; + case 3 : dictID = MEM_readLE32(ip+pos); pos+=4; break; + } + switch(fcsID) + { + default: assert(0); /* impossible */ + case 0 : if (singleSegment) frameContentSize = ip[pos]; break; + case 1 : frameContentSize = MEM_readLE16(ip+pos)+256; break; + case 2 : frameContentSize = MEM_readLE32(ip+pos); break; + case 3 : frameContentSize = MEM_readLE64(ip+pos); break; + } + if (singleSegment) windowSize = frameContentSize; + + zfhPtr->frameType = ZSTD_frame; + zfhPtr->frameContentSize = frameContentSize; + zfhPtr->windowSize = windowSize; + zfhPtr->blockSizeMax = (unsigned) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + zfhPtr->dictID = dictID; + zfhPtr->checksumFlag = checksumFlag; + } + return 0; +} + +/** ZSTD_getFrameHeader() : + * decode Frame Header, or require larger `srcSize`. + * note : this function does not consume input, it only reads it. + * @return : 0, `zfhPtr` is correctly filled, + * >0, `srcSize` is too small, value is wanted `srcSize` amount, + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize) +{ + return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1); +} + + +/** ZSTD_getFrameContentSize() : + * compatible with legacy mode + * @return : decompressed size of the single frame pointed to be `src` if known, otherwise + * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined + * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small) */ +unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) +{ + { ZSTD_frameHeader zfh; + if (ZSTD_getFrameHeader(&zfh, src, srcSize) != 0) + return ZSTD_CONTENTSIZE_ERROR; + if (zfh.frameType == ZSTD_skippableFrame) { + return 0; + } else { + return zfh.frameContentSize; + } } +} + +static size_t readSkippableFrameSize(void const* src, size_t srcSize) +{ + size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; + U32 sizeU32; + + RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong, ""); + + sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); + RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, + frameParameter_unsupported, ""); + { + size_t const skippableSize = skippableHeaderSize + sizeU32; + RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong, ""); + return skippableSize; + } +} + +/** ZSTD_findDecompressedSize() : + * compatible with legacy mode + * `srcSize` must be the exact length of some number of ZSTD compressed and/or + * skippable frames + * @return : decompressed size of the frames contained */ +unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long totalDstSize = 0; + + while (srcSize >= ZSTD_startingInputLength(ZSTD_f_zstd1)) { + U32 const magicNumber = MEM_readLE32(src); + + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + if (ZSTD_isError(skippableSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } + assert(skippableSize <= srcSize); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } + + { unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + if (ret >= ZSTD_CONTENTSIZE_ERROR) return ret; + + /* check for overflow */ + if (totalDstSize + ret < totalDstSize) return ZSTD_CONTENTSIZE_ERROR; + totalDstSize += ret; + } + { size_t const frameSrcSize = ZSTD_findFrameCompressedSize(src, srcSize); + if (ZSTD_isError(frameSrcSize)) { + return ZSTD_CONTENTSIZE_ERROR; + } + + src = (const BYTE *)src + frameSrcSize; + srcSize -= frameSrcSize; + } + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + if (srcSize) return ZSTD_CONTENTSIZE_ERROR; + + return totalDstSize; +} + +/** ZSTD_getDecompressedSize() : + * compatible with legacy mode + * @return : decompressed size if known, 0 otherwise + note : 0 can mean any of the following : + - frame content is empty + - decompressed size field is not present in frame header + - frame header unknown / not supported + - frame header not complete (`srcSize` too small) */ +unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize) +{ + unsigned long long const ret = ZSTD_getFrameContentSize(src, srcSize); + ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_ERROR < ZSTD_CONTENTSIZE_UNKNOWN); + return (ret >= ZSTD_CONTENTSIZE_ERROR) ? 0 : ret; +} + + +/** ZSTD_decodeFrameHeader() : + * `headerSize` must be the size provided by ZSTD_frameHeaderSize(). + * If multiple DDict references are enabled, also will choose the correct DDict to use. + * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */ +static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize) +{ + size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); + if (ZSTD_isError(result)) return result; /* invalid header */ + RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); + + /* Reference DDict requested by frame if dctx references multiple ddicts */ + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts && dctx->ddictSet) { + ZSTD_DCtx_selectFrameDDict(dctx); + } + +#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + /* Skip the dictID check in fuzzing mode, because it makes the search + * harder. + */ + RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), + dictionary_wrong, ""); +#endif + dctx->validateChecksum = (dctx->fParams.checksumFlag && !dctx->forceIgnoreChecksum) ? 1 : 0; + if (dctx->validateChecksum) xxh64_reset(&dctx->xxhState, 0); + dctx->processedCSize += headerSize; + return 0; +} + +static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) +{ + ZSTD_frameSizeInfo frameSizeInfo; + frameSizeInfo.compressedSize = ret; + frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; + return frameSizeInfo; +} + +static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) +{ + ZSTD_frameSizeInfo frameSizeInfo; + ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); + + + if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) + && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); + assert(ZSTD_isError(frameSizeInfo.compressedSize) || + frameSizeInfo.compressedSize <= srcSize); + return frameSizeInfo; + } else { + const BYTE* ip = (const BYTE*)src; + const BYTE* const ipstart = ip; + size_t remainingSize = srcSize; + size_t nbBlocks = 0; + ZSTD_frameHeader zfh; + + /* Extract Frame Header */ + { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(ret)) + return ZSTD_errorFrameSizeInfo(ret); + if (ret > 0) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + } + + ip += zfh.headerSize; + remainingSize -= zfh.headerSize; + + /* Iterate over each block */ + while (1) { + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) + return ZSTD_errorFrameSizeInfo(cBlockSize); + + if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + + ip += ZSTD_blockHeaderSize + cBlockSize; + remainingSize -= ZSTD_blockHeaderSize + cBlockSize; + nbBlocks++; + + if (blockProperties.lastBlock) break; + } + + /* Final frame content checksum */ + if (zfh.checksumFlag) { + if (remainingSize < 4) + return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); + ip += 4; + } + + frameSizeInfo.compressedSize = (size_t)(ip - ipstart); + frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) + ? zfh.frameContentSize + : nbBlocks * zfh.blockSizeMax; + return frameSizeInfo; + } +} + +/** ZSTD_findFrameCompressedSize() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame + * `srcSize` must be at least as large as the frame contained + * @return : the compressed size of the frame starting at `src` */ +size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) +{ + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + return frameSizeInfo.compressedSize; +} + +/** ZSTD_decompressBound() : + * compatible with legacy mode + * `src` must point to the start of a ZSTD frame or a skippeable frame + * `srcSize` must be at least as large as the frame contained + * @return : the maximum decompressed size of the compressed source + */ +unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) +{ + unsigned long long bound = 0; + /* Iterate over each frame */ + while (srcSize > 0) { + ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); + size_t const compressedSize = frameSizeInfo.compressedSize; + unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; + if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) + return ZSTD_CONTENTSIZE_ERROR; + assert(srcSize >= compressedSize); + src = (const BYTE*)src + compressedSize; + srcSize -= compressedSize; + bound += decompressedBound; + } + return bound; +} + + +/*-************************************************************* + * Frame decoding + ***************************************************************/ + +/** ZSTD_insertBlock() : + * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ +size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) +{ + DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); + ZSTD_checkContinuity(dctx, blockStart, blockSize); + dctx->previousDstEnd = (const char*)blockStart + blockSize; + return blockSize; +} + + +static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_copyRawBlock"); + RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall, ""); + if (dst == NULL) { + if (srcSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } + ZSTD_memcpy(dst, src, srcSize); + return srcSize; +} + +static size_t ZSTD_setRleBlock(void* dst, size_t dstCapacity, + BYTE b, + size_t regenSize) +{ + RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall, ""); + if (dst == NULL) { + if (regenSize == 0) return 0; + RETURN_ERROR(dstBuffer_null, ""); + } + ZSTD_memset(dst, b, regenSize); + return regenSize; +} + +static void ZSTD_DCtx_trace_end(ZSTD_DCtx const* dctx, U64 uncompressedSize, U64 compressedSize, unsigned streaming) +{ + (void)dctx; + (void)uncompressedSize; + (void)compressedSize; + (void)streaming; +} + + +/*! ZSTD_decompressFrame() : + * @dctx must be properly initialized + * will update *srcPtr and *srcSizePtr, + * to make *srcPtr progress by one frame. */ +static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void** srcPtr, size_t *srcSizePtr) +{ + const BYTE* const istart = (const BYTE*)(*srcPtr); + const BYTE* ip = istart; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = dstCapacity != 0 ? ostart + dstCapacity : ostart; + BYTE* op = ostart; + size_t remainingSrcSize = *srcSizePtr; + + DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr); + + /* check */ + RETURN_ERROR_IF( + remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN(dctx->format)+ZSTD_blockHeaderSize, + srcSize_wrong, ""); + + /* Frame Header */ + { size_t const frameHeaderSize = ZSTD_frameHeaderSize_internal( + ip, ZSTD_FRAMEHEADERSIZE_PREFIX(dctx->format), dctx->format); + if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; + RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, + srcSize_wrong, ""); + FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) , ""); + ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; + } + + /* Loop on each block */ + while (1) { + size_t decodedSize; + blockProperties_t blockProperties; + size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSrcSize, &blockProperties); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + + ip += ZSTD_blockHeaderSize; + remainingSrcSize -= ZSTD_blockHeaderSize; + RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong, ""); + + switch(blockProperties.blockType) + { + case bt_compressed: + decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1); + break; + case bt_raw : + decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize); + break; + case bt_rle : + decodedSize = ZSTD_setRleBlock(op, (size_t)(oend-op), *ip, blockProperties.origSize); + break; + case bt_reserved : + default: + RETURN_ERROR(corruption_detected, "invalid block type"); + } + + if (ZSTD_isError(decodedSize)) return decodedSize; + if (dctx->validateChecksum) + xxh64_update(&dctx->xxhState, op, decodedSize); + if (decodedSize != 0) + op += decodedSize; + assert(ip != NULL); + ip += cBlockSize; + remainingSrcSize -= cBlockSize; + if (blockProperties.lastBlock) break; + } + + if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { + RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, + corruption_detected, ""); + } + if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ + RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong, ""); + if (!dctx->forceIgnoreChecksum) { + U32 const checkCalc = (U32)xxh64_digest(&dctx->xxhState); + U32 checkRead; + checkRead = MEM_readLE32(ip); + RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong, ""); + } + ip += 4; + remainingSrcSize -= 4; + } + ZSTD_DCtx_trace_end(dctx, (U64)(op-ostart), (U64)(ip-istart), /* streaming */ 0); + /* Allow caller to get size read */ + *srcPtr = ip; + *srcSizePtr = remainingSrcSize; + return (size_t)(op-ostart); +} + +static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize, + const ZSTD_DDict* ddict) +{ + void* const dststart = dst; + int moreThan1Frame = 0; + + DEBUGLOG(5, "ZSTD_decompressMultiFrame"); + assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */ + + if (ddict) { + dict = ZSTD_DDict_dictContent(ddict); + dictSize = ZSTD_DDict_dictSize(ddict); + } + + while (srcSize >= ZSTD_startingInputLength(dctx->format)) { + + + { U32 const magicNumber = MEM_readLE32(src); + DEBUGLOG(4, "reading magic number %08X (expecting %08X)", + (unsigned)magicNumber, ZSTD_MAGICNUMBER); + if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { + size_t const skippableSize = readSkippableFrameSize(src, srcSize); + FORWARD_IF_ERROR(skippableSize, "readSkippableFrameSize failed"); + assert(skippableSize <= srcSize); + + src = (const BYTE *)src + skippableSize; + srcSize -= skippableSize; + continue; + } } + + if (ddict) { + /* we were called from ZSTD_decompress_usingDDict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict), ""); + } else { + /* this will initialize correctly with no dict if dict == NULL, so + * use this in all cases but ddict */ + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize), ""); + } + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, + &src, &srcSize); + RETURN_ERROR_IF( + (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) + && (moreThan1Frame==1), + srcSize_wrong, + "At least one frame successfully completed, " + "but following bytes are garbage: " + "it's more likely to be a srcSize error, " + "specifying more input bytes than size of frame(s). " + "Note: one could be unlucky, it might be a corruption error instead, " + "happening right at the place where we expect zstd magic bytes. " + "But this is _much_ less likely than a srcSize field error."); + if (ZSTD_isError(res)) return res; + assert(res <= dstCapacity); + if (res != 0) + dst = (BYTE*)dst + res; + dstCapacity -= res; + } + moreThan1Frame = 1; + } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ + + RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); + + return (size_t)((BYTE*)dst - (BYTE*)dststart); +} + +size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const void* dict, size_t dictSize) +{ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, dict, dictSize, NULL); +} + + +static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx) +{ + switch (dctx->dictUses) { + default: + assert(0 /* Impossible */); + /* fall-through */ + case ZSTD_dont_use: + ZSTD_clearDict(dctx); + return NULL; + case ZSTD_use_indefinitely: + return dctx->ddict; + case ZSTD_use_once: + dctx->dictUses = ZSTD_dont_use; + return dctx->ddict; + } +} + +size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx)); +} + + +size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ +#if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) + size_t regenSize; + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!"); + regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); + ZSTD_freeDCtx(dctx); + return regenSize; +#else /* stack mode */ + ZSTD_DCtx dctx; + ZSTD_initDCtx_internal(&dctx); + return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize); +#endif +} + + +/*-************************************** +* Advanced Streaming Decompression API +* Bufferless and synchronous +****************************************/ +size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; } + +/** + * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed, + * we allow taking a partial block as the input. Currently only raw uncompressed blocks can + * be streamed. + * + * For blocks that can be streamed, this allows us to reduce the latency until we produce + * output, and avoid copying the input. + * + * @param inputSize - The total amount of input that the caller currently has. + */ +static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t inputSize) { + if (!(dctx->stage == ZSTDds_decompressBlock || dctx->stage == ZSTDds_decompressLastBlock)) + return dctx->expected; + if (dctx->bType != bt_raw) + return dctx->expected; + return MIN(MAX(inputSize, 1), dctx->expected); +} + +ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) { + switch(dctx->stage) + { + default: /* should not happen */ + assert(0); + case ZSTDds_getFrameHeaderSize: + case ZSTDds_decodeFrameHeader: + return ZSTDnit_frameHeader; + case ZSTDds_decodeBlockHeader: + return ZSTDnit_blockHeader; + case ZSTDds_decompressBlock: + return ZSTDnit_block; + case ZSTDds_decompressLastBlock: + return ZSTDnit_lastBlock; + case ZSTDds_checkChecksum: + return ZSTDnit_checksum; + case ZSTDds_decodeSkippableHeader: + case ZSTDds_skipFrame: + return ZSTDnit_skippableFrame; + } +} + +static int ZSTD_isSkipFrame(ZSTD_DCtx* dctx) { return dctx->stage == ZSTDds_skipFrame; } + +/** ZSTD_decompressContinue() : + * srcSize : must be the exact nb of bytes expected (see ZSTD_nextSrcSizeToDecompress()) + * @return : nb of bytes generated into `dst` (necessarily <= `dstCapacity) + * or an error code, which can be tested using ZSTD_isError() */ +size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) +{ + DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); + /* Sanity check */ + RETURN_ERROR_IF(srcSize != ZSTD_nextSrcSizeToDecompressWithInputSize(dctx, srcSize), srcSize_wrong, "not allowed"); + ZSTD_checkContinuity(dctx, dst, dstCapacity); + + dctx->processedCSize += srcSize; + + switch (dctx->stage) + { + case ZSTDds_getFrameHeaderSize : + assert(src != NULL); + if (dctx->format == ZSTD_f_zstd1) { /* allows header */ + assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */ + if ((MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = ZSTD_SKIPPABLEHEADERSIZE - srcSize; /* remaining to load to get full skippable frame header */ + dctx->stage = ZSTDds_decodeSkippableHeader; + return 0; + } } + dctx->headerSize = ZSTD_frameHeaderSize_internal(src, srcSize, dctx->format); + if (ZSTD_isError(dctx->headerSize)) return dctx->headerSize; + ZSTD_memcpy(dctx->headerBuffer, src, srcSize); + dctx->expected = dctx->headerSize - srcSize; + dctx->stage = ZSTDds_decodeFrameHeader; + return 0; + + case ZSTDds_decodeFrameHeader: + assert(src != NULL); + ZSTD_memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize), ""); + dctx->expected = ZSTD_blockHeaderSize; + dctx->stage = ZSTDds_decodeBlockHeader; + return 0; + + case ZSTDds_decodeBlockHeader: + { blockProperties_t bp; + size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); + if (ZSTD_isError(cBlockSize)) return cBlockSize; + RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum"); + dctx->expected = cBlockSize; + dctx->bType = bp.blockType; + dctx->rleSize = bp.origSize; + if (cBlockSize) { + dctx->stage = bp.lastBlock ? ZSTDds_decompressLastBlock : ZSTDds_decompressBlock; + return 0; + } + /* empty block */ + if (bp.lastBlock) { + if (dctx->fParams.checksumFlag) { + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + dctx->expected = 0; /* end of frame */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->expected = ZSTD_blockHeaderSize; /* jump to next header */ + dctx->stage = ZSTDds_decodeBlockHeader; + } + return 0; + } + + case ZSTDds_decompressLastBlock: + case ZSTDds_decompressBlock: + DEBUGLOG(5, "ZSTD_decompressContinue: case ZSTDds_decompressBlock"); + { size_t rSize; + switch(dctx->bType) + { + case bt_compressed: + DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed"); + rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1); + dctx->expected = 0; /* Streaming not supported */ + break; + case bt_raw : + assert(srcSize <= dctx->expected); + rSize = ZSTD_copyRawBlock(dst, dstCapacity, src, srcSize); + FORWARD_IF_ERROR(rSize, "ZSTD_copyRawBlock failed"); + assert(rSize == srcSize); + dctx->expected -= rSize; + break; + case bt_rle : + rSize = ZSTD_setRleBlock(dst, dstCapacity, *(const BYTE*)src, dctx->rleSize); + dctx->expected = 0; /* Streaming not supported */ + break; + case bt_reserved : /* should never happen */ + default: + RETURN_ERROR(corruption_detected, "invalid block type"); + } + FORWARD_IF_ERROR(rSize, ""); + RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); + DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); + dctx->decodedSize += rSize; + if (dctx->validateChecksum) xxh64_update(&dctx->xxhState, dst, rSize); + dctx->previousDstEnd = (char*)dst + rSize; + + /* Stay on the same stage until we are finished streaming the block. */ + if (dctx->expected > 0) { + return rSize; + } + + if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ + DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); + RETURN_ERROR_IF( + dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && dctx->decodedSize != dctx->fParams.frameContentSize, + corruption_detected, ""); + if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ + dctx->expected = 4; + dctx->stage = ZSTDds_checkChecksum; + } else { + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); + dctx->expected = 0; /* ends here */ + dctx->stage = ZSTDds_getFrameHeaderSize; + } + } else { + dctx->stage = ZSTDds_decodeBlockHeader; + dctx->expected = ZSTD_blockHeaderSize; + } + return rSize; + } + + case ZSTDds_checkChecksum: + assert(srcSize == 4); /* guaranteed by dctx->expected */ + { + if (dctx->validateChecksum) { + U32 const h32 = (U32)xxh64_digest(&dctx->xxhState); + U32 const check32 = MEM_readLE32(src); + DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); + RETURN_ERROR_IF(check32 != h32, checksum_wrong, ""); + } + ZSTD_DCtx_trace_end(dctx, dctx->decodedSize, dctx->processedCSize, /* streaming */ 1); + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + } + + case ZSTDds_decodeSkippableHeader: + assert(src != NULL); + assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE); + ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */ + dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */ + dctx->stage = ZSTDds_skipFrame; + return 0; + + case ZSTDds_skipFrame: + dctx->expected = 0; + dctx->stage = ZSTDds_getFrameHeaderSize; + return 0; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ + } +} + + +static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dict; + dctx->previousDstEnd = (const char*)dict + dictSize; +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + dctx->dictContentBeginForFuzzing = dctx->prefixStart; + dctx->dictContentEndForFuzzing = dctx->previousDstEnd; +#endif + return 0; +} + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of entropy tables read */ +size_t +ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize) +{ + const BYTE* dictPtr = (const BYTE*)dict; + const BYTE* const dictEnd = dictPtr + dictSize; + + RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted, "dict is too small"); + assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ + dictPtr += 8; /* skip header = magic + dictID */ + + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable)); + ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable)); + ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE); + { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */ + size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable); +#ifdef HUF_FORCE_DECOMPRESS_X1 + /* in minimal huffman, we always use X1 variants */ + size_t const hSize = HUF_readDTableX1_wksp(entropy->hufTable, + dictPtr, dictEnd - dictPtr, + workspace, workspaceSize); +#else + size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable, + dictPtr, (size_t)(dictEnd - dictPtr), + workspace, workspaceSize); +#endif + RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted, ""); + dictPtr += hSize; + } + + { short offcodeNCount[MaxOff+1]; + unsigned offcodeMaxValue = MaxOff, offcodeLog; + size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted, ""); + RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->OFTable, + offcodeNCount, offcodeMaxValue, + OF_base, OF_bits, + offcodeLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */0); + dictPtr += offcodeHeaderSize; + } + + { short matchlengthNCount[MaxML+1]; + unsigned matchlengthMaxValue = MaxML, matchlengthLog; + size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted, ""); + RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->MLTable, + matchlengthNCount, matchlengthMaxValue, + ML_base, ML_bits, + matchlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); + dictPtr += matchlengthHeaderSize; + } + + { short litlengthNCount[MaxLL+1]; + unsigned litlengthMaxValue = MaxLL, litlengthLog; + size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, (size_t)(dictEnd-dictPtr)); + RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted, ""); + RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted, ""); + ZSTD_buildFSETable( entropy->LLTable, + litlengthNCount, litlengthMaxValue, + LL_base, LL_bits, + litlengthLog, + entropy->workspace, sizeof(entropy->workspace), + /* bmi2 */ 0); + dictPtr += litlengthHeaderSize; + } + + RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted, ""); + { int i; + size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); + for (i=0; i<3; i++) { + U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; + RETURN_ERROR_IF(rep==0 || rep > dictContentSize, + dictionary_corrupted, ""); + entropy->rep[i] = rep; + } } + + return (size_t)(dictPtr - (const BYTE*)dict); +} + +static size_t ZSTD_decompress_insertDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + if (dictSize < 8) return ZSTD_refDictContent(dctx, dict, dictSize); + { U32 const magic = MEM_readLE32(dict); + if (magic != ZSTD_MAGIC_DICTIONARY) { + return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */ + } } + dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); + + /* load entropy tables */ + { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); + RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted, ""); + dict = (const char*)dict + eSize; + dictSize -= eSize; + } + dctx->litEntropy = dctx->fseEntropy = 1; + + /* reference dictionary content */ + return ZSTD_refDictContent(dctx, dict, dictSize); +} + +size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx) +{ + assert(dctx != NULL); + dctx->expected = ZSTD_startingInputLength(dctx->format); /* dctx->format must be properly set */ + dctx->stage = ZSTDds_getFrameHeaderSize; + dctx->processedCSize = 0; + dctx->decodedSize = 0; + dctx->previousDstEnd = NULL; + dctx->prefixStart = NULL; + dctx->virtualStart = NULL; + dctx->dictEnd = NULL; + dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ + dctx->litEntropy = dctx->fseEntropy = 0; + dctx->dictID = 0; + dctx->bType = bt_reserved; + ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue)); + ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */ + dctx->LLTptr = dctx->entropy.LLTable; + dctx->MLTptr = dctx->entropy.MLTable; + dctx->OFTptr = dctx->entropy.OFTable; + dctx->HUFptr = dctx->entropy.hufTable; + return 0; +} + +size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); + if (dict && dictSize) + RETURN_ERROR_IF( + ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), + dictionary_corrupted, ""); + return 0; +} + + +/* ====== ZSTD_DDict ====== */ + +size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict"); + assert(dctx != NULL); + if (ddict) { + const char* const dictStart = (const char*)ZSTD_DDict_dictContent(ddict); + size_t const dictSize = ZSTD_DDict_dictSize(ddict); + const void* const dictEnd = dictStart + dictSize; + dctx->ddictIsCold = (dctx->dictEnd != dictEnd); + DEBUGLOG(4, "DDict is %s", + dctx->ddictIsCold ? "~cold~" : "hot!"); + } + FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) , ""); + if (ddict) { /* NULL ddict is equivalent to no dictionary */ + ZSTD_copyDDictParameters(dctx, ddict); + } + return 0; +} + +/*! ZSTD_getDictID_fromDict() : + * Provides the dictID stored within dictionary. + * if @return == 0, the dictionary is not conformant with Zstandard specification. + * It can still be loaded, but as a content-only dictionary. */ +unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) +{ + if (dictSize < 8) return 0; + if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0; + return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE); +} + +/*! ZSTD_getDictID_fromFrame() : + * Provides the dictID required to decompress frame stored within `src`. + * If @return == 0, the dictID could not be decoded. + * This could for one of the following reasons : + * - The frame does not require a dictionary (most common case). + * - The frame was built with dictID intentionally removed. + * Needed dictionary is a hidden information. + * Note : this use case also happens when using a non-conformant dictionary. + * - `srcSize` is too small, and as a result, frame header could not be decoded. + * Note : possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`. + * - This is not a Zstandard frame. + * When identifying the exact failure cause, it's possible to use + * ZSTD_getFrameHeader(), which will provide a more precise error code. */ +unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) +{ + ZSTD_frameHeader zfp = { 0, 0, 0, ZSTD_frame, 0, 0, 0 }; + size_t const hError = ZSTD_getFrameHeader(&zfp, src, srcSize); + if (ZSTD_isError(hError)) return 0; + return zfp.dictID; +} + + +/*! ZSTD_decompress_usingDDict() : +* Decompression using a pre-digested Dictionary +* Use dictionary without significant overhead. */ +size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, + const ZSTD_DDict* ddict) +{ + /* pass content and size in case legacy frames are encountered */ + return ZSTD_decompressMultiFrame(dctx, dst, dstCapacity, src, srcSize, + NULL, 0, + ddict); +} + + +/*===================================== +* Streaming decompression +*====================================*/ + +ZSTD_DStream* ZSTD_createDStream(void) +{ + DEBUGLOG(3, "ZSTD_createDStream"); + return ZSTD_createDStream_advanced(ZSTD_defaultCMem); +} + +ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize) +{ + return ZSTD_initStaticDCtx(workspace, workspaceSize); +} + +ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem) +{ + return ZSTD_createDCtx_advanced(customMem); +} + +size_t ZSTD_freeDStream(ZSTD_DStream* zds) +{ + return ZSTD_freeDCtx(zds); +} + + +/* *** Initialization *** */ + +size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; } +size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; } + +size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, + const void* dict, size_t dictSize, + ZSTD_dictLoadMethod_e dictLoadMethod, + ZSTD_dictContentType_e dictContentType) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + if (dict && dictSize != 0) { + dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); + RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation, "NULL pointer!"); + dctx->ddict = dctx->ddictLocal; + dctx->dictUses = ZSTD_use_indefinitely; + } + return 0; +} + +size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) +{ + return ZSTD_DCtx_loadDictionary_advanced(dctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto); +} + +size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) +{ + FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType), ""); + dctx->dictUses = ZSTD_use_once; + return 0; +} + +size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) +{ + return ZSTD_DCtx_refPrefix_advanced(dctx, prefix, prefixSize, ZSTD_dct_rawContent); +} + + +/* ZSTD_initDStream_usingDict() : + * return : expected size, aka ZSTD_startingInputLength(). + * this function cannot fail */ +size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) +{ + DEBUGLOG(4, "ZSTD_initDStream_usingDict"); + FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) , ""); + return ZSTD_startingInputLength(zds->format); +} + +/* note : this variant can't fail */ +size_t ZSTD_initDStream(ZSTD_DStream* zds) +{ + DEBUGLOG(4, "ZSTD_initDStream"); + return ZSTD_initDStream_usingDDict(zds, NULL); +} + +/* ZSTD_initDStream_usingDDict() : + * ddict will just be referenced, and must outlive decompression session + * this function cannot fail */ +size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) +{ + FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) , ""); + FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) , ""); + return ZSTD_startingInputLength(dctx->format); +} + +/* ZSTD_resetDStream() : + * return : expected size, aka ZSTD_startingInputLength(). + * this function cannot fail */ +size_t ZSTD_resetDStream(ZSTD_DStream* dctx) +{ + FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only), ""); + return ZSTD_startingInputLength(dctx->format); +} + + +size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + if (ddict) { + dctx->ddict = ddict; + dctx->dictUses = ZSTD_use_indefinitely; + if (dctx->refMultipleDDicts == ZSTD_rmd_refMultipleDDicts) { + if (dctx->ddictSet == NULL) { + dctx->ddictSet = ZSTD_createDDictHashSet(dctx->customMem); + if (!dctx->ddictSet) { + RETURN_ERROR(memory_allocation, "Failed to allocate memory for hash set!"); + } + } + assert(!dctx->staticSize); /* Impossible: ddictSet cannot have been allocated if static dctx */ + FORWARD_IF_ERROR(ZSTD_DDictHashSet_addDDict(dctx->ddictSet, ddict, dctx->customMem), ""); + } + } + return 0; +} + +/* ZSTD_DCtx_setMaxWindowSize() : + * note : no direct equivalence in ZSTD_DCtx_setParameter, + * since this version sets windowSize, and the other sets windowLog */ +size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize) +{ + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); + size_t const min = (size_t)1 << bounds.lowerBound; + size_t const max = (size_t)1 << bounds.upperBound; + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound, ""); + RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound, ""); + dctx->maxWindowSize = maxWindowSize; + return 0; +} + +size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format) +{ + return ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, (int)format); +} + +ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam) +{ + ZSTD_bounds bounds = { 0, 0, 0 }; + switch(dParam) { + case ZSTD_d_windowLogMax: + bounds.lowerBound = ZSTD_WINDOWLOG_ABSOLUTEMIN; + bounds.upperBound = ZSTD_WINDOWLOG_MAX; + return bounds; + case ZSTD_d_format: + bounds.lowerBound = (int)ZSTD_f_zstd1; + bounds.upperBound = (int)ZSTD_f_zstd1_magicless; + ZSTD_STATIC_ASSERT(ZSTD_f_zstd1 < ZSTD_f_zstd1_magicless); + return bounds; + case ZSTD_d_stableOutBuffer: + bounds.lowerBound = (int)ZSTD_bm_buffered; + bounds.upperBound = (int)ZSTD_bm_stable; + return bounds; + case ZSTD_d_forceIgnoreChecksum: + bounds.lowerBound = (int)ZSTD_d_validateChecksum; + bounds.upperBound = (int)ZSTD_d_ignoreChecksum; + return bounds; + case ZSTD_d_refMultipleDDicts: + bounds.lowerBound = (int)ZSTD_rmd_refSingleDDict; + bounds.upperBound = (int)ZSTD_rmd_refMultipleDDicts; + return bounds; + default:; + } + bounds.error = ERROR(parameter_unsupported); + return bounds; +} + +/* ZSTD_dParam_withinBounds: + * @return 1 if value is within dParam bounds, + * 0 otherwise */ +static int ZSTD_dParam_withinBounds(ZSTD_dParameter dParam, int value) +{ + ZSTD_bounds const bounds = ZSTD_dParam_getBounds(dParam); + if (ZSTD_isError(bounds.error)) return 0; + if (value < bounds.lowerBound) return 0; + if (value > bounds.upperBound) return 0; + return 1; +} + +#define CHECK_DBOUNDS(p,v) { \ + RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound, ""); \ +} + +size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value) +{ + switch (param) { + case ZSTD_d_windowLogMax: + *value = (int)ZSTD_highbit32((U32)dctx->maxWindowSize); + return 0; + case ZSTD_d_format: + *value = (int)dctx->format; + return 0; + case ZSTD_d_stableOutBuffer: + *value = (int)dctx->outBufferMode; + return 0; + case ZSTD_d_forceIgnoreChecksum: + *value = (int)dctx->forceIgnoreChecksum; + return 0; + case ZSTD_d_refMultipleDDicts: + *value = (int)dctx->refMultipleDDicts; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); +} + +size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) +{ + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + switch(dParam) { + case ZSTD_d_windowLogMax: + if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; + CHECK_DBOUNDS(ZSTD_d_windowLogMax, value); + dctx->maxWindowSize = ((size_t)1) << value; + return 0; + case ZSTD_d_format: + CHECK_DBOUNDS(ZSTD_d_format, value); + dctx->format = (ZSTD_format_e)value; + return 0; + case ZSTD_d_stableOutBuffer: + CHECK_DBOUNDS(ZSTD_d_stableOutBuffer, value); + dctx->outBufferMode = (ZSTD_bufferMode_e)value; + return 0; + case ZSTD_d_forceIgnoreChecksum: + CHECK_DBOUNDS(ZSTD_d_forceIgnoreChecksum, value); + dctx->forceIgnoreChecksum = (ZSTD_forceIgnoreChecksum_e)value; + return 0; + case ZSTD_d_refMultipleDDicts: + CHECK_DBOUNDS(ZSTD_d_refMultipleDDicts, value); + if (dctx->staticSize != 0) { + RETURN_ERROR(parameter_unsupported, "Static dctx does not support multiple DDicts!"); + } + dctx->refMultipleDDicts = (ZSTD_refMultipleDDicts_e)value; + return 0; + default:; + } + RETURN_ERROR(parameter_unsupported, ""); +} + +size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) +{ + if ( (reset == ZSTD_reset_session_only) + || (reset == ZSTD_reset_session_and_parameters) ) { + dctx->streamStage = zdss_init; + dctx->noForwardProgress = 0; + } + if ( (reset == ZSTD_reset_parameters) + || (reset == ZSTD_reset_session_and_parameters) ) { + RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong, ""); + ZSTD_clearDict(dctx); + ZSTD_DCtx_resetParameters(dctx); + } + return 0; +} + + +size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx) +{ + return ZSTD_sizeof_DCtx(dctx); +} + +size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize) +{ + size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); + unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); + size_t const minRBSize = (size_t) neededSize; + RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, + frameParameter_windowTooLarge, ""); + return minRBSize; +} + +size_t ZSTD_estimateDStreamSize(size_t windowSize) +{ + size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX); + size_t const inBuffSize = blockSize; /* no block can be larger */ + size_t const outBuffSize = ZSTD_decodingBufferSize_min(windowSize, ZSTD_CONTENTSIZE_UNKNOWN); + return ZSTD_estimateDCtxSize() + inBuffSize + outBuffSize; +} + +size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize) +{ + U32 const windowSizeMax = 1U << ZSTD_WINDOWLOG_MAX; /* note : should be user-selectable, but requires an additional parameter (or a dctx) */ + ZSTD_frameHeader zfh; + size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); + if (ZSTD_isError(err)) return err; + RETURN_ERROR_IF(err>0, srcSize_wrong, ""); + RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, + frameParameter_windowTooLarge, ""); + return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); +} + + +/* ***** Decompression ***** */ + +static int ZSTD_DCtx_isOverflow(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + return (zds->inBuffSize + zds->outBuffSize) >= (neededInBuffSize + neededOutBuffSize) * ZSTD_WORKSPACETOOLARGE_FACTOR; +} + +static void ZSTD_DCtx_updateOversizedDuration(ZSTD_DStream* zds, size_t const neededInBuffSize, size_t const neededOutBuffSize) +{ + if (ZSTD_DCtx_isOverflow(zds, neededInBuffSize, neededOutBuffSize)) + zds->oversizedDuration++; + else + zds->oversizedDuration = 0; +} + +static int ZSTD_DCtx_isOversizedTooLong(ZSTD_DStream* zds) +{ + return zds->oversizedDuration >= ZSTD_WORKSPACETOOLARGE_MAXDURATION; +} + +/* Checks that the output buffer hasn't changed if ZSTD_obm_stable is used. */ +static size_t ZSTD_checkOutBuffer(ZSTD_DStream const* zds, ZSTD_outBuffer const* output) +{ + ZSTD_outBuffer const expect = zds->expectedOutBuffer; + /* No requirement when ZSTD_obm_stable is not enabled. */ + if (zds->outBufferMode != ZSTD_bm_stable) + return 0; + /* Any buffer is allowed in zdss_init, this must be the same for every other call until + * the context is reset. + */ + if (zds->streamStage == zdss_init) + return 0; + /* The buffer must match our expectation exactly. */ + if (expect.dst == output->dst && expect.pos == output->pos && expect.size == output->size) + return 0; + RETURN_ERROR(dstBuffer_wrong, "ZSTD_d_stableOutBuffer enabled but output differs!"); +} + +/* Calls ZSTD_decompressContinue() with the right parameters for ZSTD_decompressStream() + * and updates the stage and the output buffer state. This call is extracted so it can be + * used both when reading directly from the ZSTD_inBuffer, and in buffered input mode. + * NOTE: You must break after calling this function since the streamStage is modified. + */ +static size_t ZSTD_decompressContinueStream( + ZSTD_DStream* zds, char** op, char* oend, + void const* src, size_t srcSize) { + int const isSkipFrame = ZSTD_isSkipFrame(zds); + if (zds->outBufferMode == ZSTD_bm_buffered) { + size_t const dstSize = isSkipFrame ? 0 : zds->outBuffSize - zds->outStart; + size_t const decodedSize = ZSTD_decompressContinue(zds, + zds->outBuff + zds->outStart, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + if (!decodedSize && !isSkipFrame) { + zds->streamStage = zdss_read; + } else { + zds->outEnd = zds->outStart + decodedSize; + zds->streamStage = zdss_flush; + } + } else { + /* Write directly into the output buffer */ + size_t const dstSize = isSkipFrame ? 0 : (size_t)(oend - *op); + size_t const decodedSize = ZSTD_decompressContinue(zds, *op, dstSize, src, srcSize); + FORWARD_IF_ERROR(decodedSize, ""); + *op += decodedSize; + /* Flushing is not needed. */ + zds->streamStage = zdss_read; + assert(*op <= oend); + assert(zds->outBufferMode == ZSTD_bm_stable); + } + return 0; +} + +size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input) +{ + const char* const src = (const char*)input->src; + const char* const istart = input->pos != 0 ? src + input->pos : src; + const char* const iend = input->size != 0 ? src + input->size : src; + const char* ip = istart; + char* const dst = (char*)output->dst; + char* const ostart = output->pos != 0 ? dst + output->pos : dst; + char* const oend = output->size != 0 ? dst + output->size : dst; + char* op = ostart; + U32 someMoreWork = 1; + + DEBUGLOG(5, "ZSTD_decompressStream"); + RETURN_ERROR_IF( + input->pos > input->size, + srcSize_wrong, + "forbidden. in: pos: %u vs size: %u", + (U32)input->pos, (U32)input->size); + RETURN_ERROR_IF( + output->pos > output->size, + dstSize_tooSmall, + "forbidden. out: pos: %u vs size: %u", + (U32)output->pos, (U32)output->size); + DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); + FORWARD_IF_ERROR(ZSTD_checkOutBuffer(zds, output), ""); + + while (someMoreWork) { + switch(zds->streamStage) + { + case zdss_init : + DEBUGLOG(5, "stage zdss_init => transparent reset "); + zds->streamStage = zdss_loadHeader; + zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; + zds->legacyVersion = 0; + zds->hostageByte = 0; + zds->expectedOutBuffer = *output; + /* fall-through */ + + case zdss_loadHeader : + DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); + { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format); + if (zds->refMultipleDDicts && zds->ddictSet) { + ZSTD_DCtx_selectFrameDDict(zds); + } + DEBUGLOG(5, "header size : %u", (U32)hSize); + if (ZSTD_isError(hSize)) { + return hSize; /* error */ + } + if (hSize != 0) { /* need more input */ + size_t const toLoad = hSize - zds->lhSize; /* if hSize!=0, hSize > zds->lhSize */ + size_t const remainingInput = (size_t)(iend-ip); + assert(iend >= ip); + if (toLoad > remainingInput) { /* not enough input to load full header */ + if (remainingInput > 0) { + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, remainingInput); + zds->lhSize += remainingInput; + } + input->pos = input->size; + return (MAX((size_t)ZSTD_FRAMEHEADERSIZE_MIN(zds->format), hSize) - zds->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */ + } + assert(ip != NULL); + ZSTD_memcpy(zds->headerBuffer + zds->lhSize, ip, toLoad); zds->lhSize = hSize; ip += toLoad; + break; + } } + + /* check for single-pass mode opportunity */ + if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && zds->fParams.frameType != ZSTD_skippableFrame + && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) { + size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart)); + if (cSize <= (size_t)(iend-istart)) { + /* shortcut : using single-pass mode */ + size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds)); + if (ZSTD_isError(decompressedSize)) return decompressedSize; + DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") + ip = istart + cSize; + op += decompressedSize; + zds->expected = 0; + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } } + + /* Check output buffer is large enough for ZSTD_odm_stable. */ + if (zds->outBufferMode == ZSTD_bm_stable + && zds->fParams.frameType != ZSTD_skippableFrame + && zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN + && (U64)(size_t)(oend-op) < zds->fParams.frameContentSize) { + RETURN_ERROR(dstSize_tooSmall, "ZSTD_obm_stable passed but ZSTD_outBuffer is too small"); + } + + /* Consume header (see ZSTDds_decodeFrameHeader) */ + DEBUGLOG(4, "Consume header"); + FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), ""); + + if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ + zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); + zds->stage = ZSTDds_skipFrame; + } else { + FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize), ""); + zds->expected = ZSTD_blockHeaderSize; + zds->stage = ZSTDds_decodeBlockHeader; + } + + /* control buffer memory usage */ + DEBUGLOG(4, "Control max memory usage (%u KB <= max %u KB)", + (U32)(zds->fParams.windowSize >>10), + (U32)(zds->maxWindowSize >> 10) ); + zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); + RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, + frameParameter_windowTooLarge, ""); + + /* Adapt buffer sizes to frame header instructions */ + { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); + size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered + ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize) + : 0; + + ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize); + + { int const tooSmall = (zds->inBuffSize < neededInBuffSize) || (zds->outBuffSize < neededOutBuffSize); + int const tooLarge = ZSTD_DCtx_isOversizedTooLong(zds); + + if (tooSmall || tooLarge) { + size_t const bufferSize = neededInBuffSize + neededOutBuffSize; + DEBUGLOG(4, "inBuff : from %u to %u", + (U32)zds->inBuffSize, (U32)neededInBuffSize); + DEBUGLOG(4, "outBuff : from %u to %u", + (U32)zds->outBuffSize, (U32)neededOutBuffSize); + if (zds->staticSize) { /* static DCtx */ + DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); + assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ + RETURN_ERROR_IF( + bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), + memory_allocation, ""); + } else { + ZSTD_customFree(zds->inBuff, zds->customMem); + zds->inBuffSize = 0; + zds->outBuffSize = 0; + zds->inBuff = (char*)ZSTD_customMalloc(bufferSize, zds->customMem); + RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation, ""); + } + zds->inBuffSize = neededInBuffSize; + zds->outBuff = zds->inBuff + zds->inBuffSize; + zds->outBuffSize = neededOutBuffSize; + } } } + zds->streamStage = zdss_read; + /* fall-through */ + + case zdss_read: + DEBUGLOG(5, "stage zdss_read"); + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompressWithInputSize(zds, (size_t)(iend - ip)); + DEBUGLOG(5, "neededInSize = %u", (U32)neededInSize); + if (neededInSize==0) { /* end of frame */ + zds->streamStage = zdss_init; + someMoreWork = 0; + break; + } + if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, ip, neededInSize), ""); + ip += neededInSize; + /* Function modifies the stage so we must break */ + break; + } } + if (ip==iend) { someMoreWork = 0; break; } /* no more input */ + zds->streamStage = zdss_load; + /* fall-through */ + + case zdss_load: + { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zds); + size_t const toLoad = neededInSize - zds->inPos; + int const isSkipFrame = ZSTD_isSkipFrame(zds); + size_t loadedSize; + /* At this point we shouldn't be decompressing a block that we can stream. */ + assert(neededInSize == ZSTD_nextSrcSizeToDecompressWithInputSize(zds, iend - ip)); + if (isSkipFrame) { + loadedSize = MIN(toLoad, (size_t)(iend-ip)); + } else { + RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, + corruption_detected, + "should never happen"); + loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, (size_t)(iend-ip)); + } + ip += loadedSize; + zds->inPos += loadedSize; + if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */ + + /* decode loaded input */ + zds->inPos = 0; /* input is consumed */ + FORWARD_IF_ERROR(ZSTD_decompressContinueStream(zds, &op, oend, zds->inBuff, neededInSize), ""); + /* Function modifies the stage so we must break */ + break; + } + case zdss_flush: + { size_t const toFlushSize = zds->outEnd - zds->outStart; + size_t const flushedSize = ZSTD_limitCopy(op, (size_t)(oend-op), zds->outBuff + zds->outStart, toFlushSize); + op += flushedSize; + zds->outStart += flushedSize; + if (flushedSize == toFlushSize) { /* flush completed */ + zds->streamStage = zdss_read; + if ( (zds->outBuffSize < zds->fParams.frameContentSize) + && (zds->outStart + zds->fParams.blockSizeMax > zds->outBuffSize) ) { + DEBUGLOG(5, "restart filling outBuff from beginning (left:%i, needed:%u)", + (int)(zds->outBuffSize - zds->outStart), + (U32)zds->fParams.blockSizeMax); + zds->outStart = zds->outEnd = 0; + } + break; + } } + /* cannot complete flush */ + someMoreWork = 0; + break; + + default: + assert(0); /* impossible */ + RETURN_ERROR(GENERIC, "impossible to reach"); /* some compiler require default to do something */ + } } + + /* result */ + input->pos = (size_t)(ip - (const char*)(input->src)); + output->pos = (size_t)(op - (char*)(output->dst)); + + /* Update the expected output buffer for ZSTD_obm_stable. */ + zds->expectedOutBuffer = *output; + + if ((ip==istart) && (op==ostart)) { /* no forward progress */ + zds->noForwardProgress ++; + if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { + RETURN_ERROR_IF(op==oend, dstSize_tooSmall, ""); + RETURN_ERROR_IF(ip==iend, srcSize_wrong, ""); + assert(0); + } + } else { + zds->noForwardProgress = 0; + } + { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds); + if (!nextSrcSizeHint) { /* frame fully decoded */ + if (zds->outEnd == zds->outStart) { /* output fully flushed */ + if (zds->hostageByte) { + if (input->pos >= input->size) { + /* can't release hostage (not present) */ + zds->streamStage = zdss_read; + return 1; + } + input->pos++; /* release hostage */ + } /* zds->hostageByte */ + return 0; + } /* zds->outEnd == zds->outStart */ + if (!zds->hostageByte) { /* output not fully flushed; keep last byte as hostage; will be released when all output is flushed */ + input->pos--; /* note : pos > 0, otherwise, impossible to finish reading last block */ + zds->hostageByte=1; + } + return 1; + } /* nextSrcSizeHint==0 */ + nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zds) == ZSTDnit_block); /* preload header of next block */ + assert(zds->inPos <= nextSrcSizeHint); + nextSrcSizeHint -= zds->inPos; /* part already loaded*/ + return nextSrcSizeHint; + } +} + +size_t ZSTD_decompressStream_simpleArgs ( + ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, size_t* dstPos, + const void* src, size_t srcSize, size_t* srcPos) +{ + ZSTD_outBuffer output = { dst, dstCapacity, *dstPos }; + ZSTD_inBuffer input = { src, srcSize, *srcPos }; + /* ZSTD_compress_generic() will check validity of dstPos and srcPos */ + size_t const cErr = ZSTD_decompressStream(dctx, &output, &input); + *dstPos = output.pos; + *srcPos = input.pos; + return cErr; +} diff --git a/lib/zstd/decompress/zstd_decompress_block.c b/lib/zstd/decompress/zstd_decompress_block.c new file mode 100644 index 000000000000..cd6eba55a21c --- /dev/null +++ b/lib/zstd/decompress/zstd_decompress_block.c @@ -0,0 +1,1540 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* zstd_decompress_block : + * this module takes care of decompressing _compressed_ block */ + +/*-******************************************************* +* Dependencies +*********************************************************/ +#include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ +#include "../common/compiler.h" /* prefetch */ +#include "../common/cpu.h" /* bmi2 */ +#include "../common/mem.h" /* low level memory routines */ +#define FSE_STATIC_LINKING_ONLY +#include "../common/fse.h" +#define HUF_STATIC_LINKING_ONLY +#include "../common/huf.h" +#include "../common/zstd_internal.h" +#include "zstd_decompress_internal.h" /* ZSTD_DCtx */ +#include "zstd_ddict.h" /* ZSTD_DDictDictContent */ +#include "zstd_decompress_block.h" + +/*_******************************************************* +* Macros +**********************************************************/ + +/* These two optional macros force the use one way or another of the two + * ZSTD_decompressSequences implementations. You can't force in both directions + * at the same time. + */ +#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!" +#endif + + +/*_******************************************************* +* Memory operations +**********************************************************/ +static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); } + + +/*-************************************************************* + * Block decoding + ***************************************************************/ + +/*! ZSTD_getcBlockSize() : + * Provides the size of compressed block from block header `src` */ +size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, + blockProperties_t* bpPtr) +{ + RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, ""); + + { U32 const cBlockHeader = MEM_readLE24(src); + U32 const cSize = cBlockHeader >> 3; + bpPtr->lastBlock = cBlockHeader & 1; + bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); + bpPtr->origSize = cSize; /* only useful for RLE */ + if (bpPtr->blockType == bt_rle) return 1; + RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, ""); + return cSize; + } +} + + +/* Hidden declaration for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize); +/*! ZSTD_decodeLiteralsBlock() : + * @return : nb of bytes read from src (< srcSize ) + * note : symbol not declared but exposed for fullbench */ +size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, + const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ +{ + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); + RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, ""); + + { const BYTE* const istart = (const BYTE*) src; + symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); + + switch(litEncType) + { + case set_repeat: + DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); + RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, ""); + /* fall-through */ + + case set_compressed: + RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); + { size_t lhSize, litSize, litCSize; + U32 singleStream=0; + U32 const lhlCode = (istart[0] >> 2) & 3; + U32 const lhc = MEM_readLE32(istart); + size_t hufSuccess; + switch(lhlCode) + { + case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */ + /* 2 - 2 - 10 - 10 */ + singleStream = !lhlCode; + lhSize = 3; + litSize = (lhc >> 4) & 0x3FF; + litCSize = (lhc >> 14) & 0x3FF; + break; + case 2: + /* 2 - 2 - 14 - 14 */ + lhSize = 4; + litSize = (lhc >> 4) & 0x3FFF; + litCSize = lhc >> 18; + break; + case 3: + /* 2 - 2 - 18 - 18 */ + lhSize = 5; + litSize = (lhc >> 4) & 0x3FFFF; + litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, ""); + + /* prefetch huffman table if cold */ + if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { + PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable)); + } + + if (litEncType==set_repeat) { + if (singleStream) { + hufSuccess = HUF_decompress1X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } else { + hufSuccess = HUF_decompress4X_usingDTable_bmi2( + dctx->litBuffer, litSize, istart+lhSize, litCSize, + dctx->HUFptr, dctx->bmi2); + } + } else { + if (singleStream) { +#if defined(HUF_FORCE_DECOMPRESS_X2) + hufSuccess = HUF_decompress1X_DCtx_wksp( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace)); +#else + hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); +#endif + } else { + hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2( + dctx->entropy.hufTable, dctx->litBuffer, litSize, + istart+lhSize, litCSize, dctx->workspace, + sizeof(dctx->workspace), dctx->bmi2); + } + } + + RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, ""); + + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + dctx->litEntropy = 1; + if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable; + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return litCSize + lhSize; + } + + case set_basic: + { size_t litSize, lhSize; + U32 const lhlCode = ((istart[0]) >> 2) & 3; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + break; + } + + if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ + RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, ""); + ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); + return lhSize+litSize; + } + /* direct reference into compressed stream */ + dctx->litPtr = istart+lhSize; + dctx->litSize = litSize; + return lhSize+litSize; + } + + case set_rle: + { U32 const lhlCode = ((istart[0]) >> 2) & 3; + size_t litSize, lhSize; + switch(lhlCode) + { + case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */ + lhSize = 1; + litSize = istart[0] >> 3; + break; + case 1: + lhSize = 2; + litSize = MEM_readLE16(istart) >> 4; + break; + case 3: + lhSize = 3; + litSize = MEM_readLE24(istart) >> 4; + RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); + break; + } + RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, ""); + ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); + dctx->litPtr = dctx->litBuffer; + dctx->litSize = litSize; + return lhSize+1; + } + default: + RETURN_ERROR(corruption_detected, "impossible"); + } + } +} + +/* Default FSE distribution tables. + * These are pre-calculated FSE decoding tables using default distributions as defined in specification : + * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions + * They were generated programmatically with following method : + * - start from default distributions, present in /lib/common/zstd_internal.h + * - generate tables normally, using ZSTD_buildFSETable() + * - printout the content of tables + * - pretify output, report below, test with fuzzer to ensure it's correct */ + +/* Default FSE distribution table for Literal Lengths */ +static const ZSTD_seqSymbol LL_defaultDTable[(1<tableLog = 0; + DTableH->fastMode = 0; + + cell->nbBits = 0; + cell->nextState = 0; + assert(nbAddBits < 255); + cell->nbAdditionalBits = (BYTE)nbAddBits; + cell->baseValue = baseValue; +} + + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * cannot fail if input is valid => + * all inputs are presumed validated at this stage */ +FORCE_INLINE_TEMPLATE +void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize) +{ + ZSTD_seqSymbol* const tableDecode = dt+1; + U32 const maxSV1 = maxSymbolValue + 1; + U32 const tableSize = 1 << tableLog; + + U16* symbolNext = (U16*)wksp; + BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1); + U32 highThreshold = tableSize - 1; + + + /* Sanity Checks */ + assert(maxSymbolValue <= MaxSeq); + assert(tableLog <= MaxFSELog); + assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE); + (void)wkspSize; + /* Init, lay down lowprob symbols */ + { ZSTD_seqSymbol_header DTableH; + DTableH.tableLog = tableLog; + DTableH.fastMode = 1; + { S16 const largeLimit= (S16)(1 << (tableLog-1)); + U32 s; + for (s=0; s= largeLimit) DTableH.fastMode=0; + assert(normalizedCounter[s]>=0); + symbolNext[s] = (U16)normalizedCounter[s]; + } } } + ZSTD_memcpy(dt, &DTableH, sizeof(DTableH)); + } + + /* Spread symbols */ + assert(tableSize <= 512); + /* Specialized symbol spreading for the case when there are + * no low probability (-1 count) symbols. When compressing + * small blocks we avoid low probability symbols to hit this + * case, since header decoding speed matters more. + */ + if (highThreshold == tableSize - 1) { + size_t const tableMask = tableSize-1; + size_t const step = FSE_TABLESTEP(tableSize); + /* First lay down the symbols in order. + * We use a uint64_t to lay down 8 bytes at a time. This reduces branch + * misses since small blocks generally have small table logs, so nearly + * all symbols have counts <= 8. We ensure we have 8 bytes at the end of + * our buffer to handle the over-write. + */ + { + U64 const add = 0x0101010101010101ull; + size_t pos = 0; + U64 sv = 0; + U32 s; + for (s=0; s highThreshold) position = (position + step) & tableMask; /* lowprob area */ + } } + assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ + } + + /* Build Decoding table */ + { + U32 u; + for (u=0; u max, corruption_detected, ""); + { U32 const symbol = *(const BYTE*)src; + U32 const baseline = baseValue[symbol]; + U32 const nbBits = nbAdditionalBits[symbol]; + ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits); + } + *DTablePtr = DTableSpace; + return 1; + case set_basic : + *DTablePtr = defaultTable; + return 0; + case set_repeat: + RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, ""); + /* prefetch FSE table if used */ + if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { + const void* const pStart = *DTablePtr; + size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog)); + PREFETCH_AREA(pStart, pSize); + } + return 0; + case set_compressed : + { unsigned tableLog; + S16 norm[MaxSeq+1]; + size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); + RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, ""); + RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, ""); + ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2); + *DTablePtr = DTableSpace; + return headerSize; + } + default : + assert(0); + RETURN_ERROR(GENERIC, "impossible"); + } +} + +size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr, + const void* src, size_t srcSize) +{ + const BYTE* const istart = (const BYTE*)src; + const BYTE* const iend = istart + srcSize; + const BYTE* ip = istart; + int nbSeq; + DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); + + /* check */ + RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, ""); + + /* SeqHead */ + nbSeq = *ip++; + if (!nbSeq) { + *nbSeqPtr=0; + RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, ""); + return 1; + } + if (nbSeq > 0x7F) { + if (nbSeq == 0xFF) { + RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, ""); + nbSeq = MEM_readLE16(ip) + LONGNBSEQ; + ip+=2; + } else { + RETURN_ERROR_IF(ip >= iend, srcSize_wrong, ""); + nbSeq = ((nbSeq-0x80)<<8) + *ip++; + } + } + *nbSeqPtr = nbSeq; + + /* FSE table descriptors */ + RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */ + { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); + symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); + symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); + ip++; + + /* Build DTables */ + { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr, + LLtype, MaxLL, LLFSELog, + ip, iend-ip, + LL_base, LL_bits, + LL_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += llhSize; + } + + { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr, + OFtype, MaxOff, OffFSELog, + ip, iend-ip, + OF_base, OF_bits, + OF_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += ofhSize; + } + + { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr, + MLtype, MaxML, MLFSELog, + ip, iend-ip, + ML_base, ML_bits, + ML_defaultDTable, dctx->fseEntropy, + dctx->ddictIsCold, nbSeq, + dctx->workspace, sizeof(dctx->workspace), + dctx->bmi2); + RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed"); + ip += mlhSize; + } + } + + return ip-istart; +} + + +typedef struct { + size_t litLength; + size_t matchLength; + size_t offset; + const BYTE* match; +} seq_t; + +typedef struct { + size_t state; + const ZSTD_seqSymbol* table; +} ZSTD_fseState; + +typedef struct { + BIT_DStream_t DStream; + ZSTD_fseState stateLL; + ZSTD_fseState stateOffb; + ZSTD_fseState stateML; + size_t prevOffset[ZSTD_REP_NUM]; + const BYTE* prefixStart; + const BYTE* dictEnd; + size_t pos; +} seqState_t; + +/*! ZSTD_overlapCopy8() : + * Copies 8 bytes from ip to op and updates op and ip where ip <= op. + * If the offset is < 8 then the offset is spread to at least 8 bytes. + * + * Precondition: *ip <= *op + * Postcondition: *op - *op >= 8 + */ +HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) { + assert(*ip <= *op); + if (offset < 8) { + /* close range match, overlap */ + static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ + static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */ + int const sub2 = dec64table[offset]; + (*op)[0] = (*ip)[0]; + (*op)[1] = (*ip)[1]; + (*op)[2] = (*ip)[2]; + (*op)[3] = (*ip)[3]; + *ip += dec32table[offset]; + ZSTD_copy4(*op+4, *ip); + *ip -= sub2; + } else { + ZSTD_copy8(*op, *ip); + } + *ip += 8; + *op += 8; + assert(*op - *ip >= 8); +} + +/*! ZSTD_safecopy() : + * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer + * and write up to 16 bytes past oend_w (op >= oend_w is allowed). + * This function is only called in the uncommon case where the sequence is near the end of the block. It + * should be fast for a single long sequence, but can be slow for several short sequences. + * + * @param ovtype controls the overlap detection + * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart. + * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart. + * The src buffer must be before the dst buffer. + */ +static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) { + ptrdiff_t const diff = op - ip; + BYTE* const oend = op + length; + + assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) || + (ovtype == ZSTD_overlap_src_before_dst && diff >= 0)); + + if (length < 8) { + /* Handle short lengths. */ + while (op < oend) *op++ = *ip++; + return; + } + if (ovtype == ZSTD_overlap_src_before_dst) { + /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */ + assert(length >= 8); + ZSTD_overlapCopy8(&op, &ip, diff); + assert(op - ip >= 8); + assert(op <= oend); + } + + if (oend <= oend_w) { + /* No risk of overwrite. */ + ZSTD_wildcopy(op, ip, length, ovtype); + return; + } + if (op <= oend_w) { + /* Wildcopy until we get close to the end. */ + assert(oend > oend_w); + ZSTD_wildcopy(op, ip, oend_w - op, ovtype); + ip += oend_w - op; + op = oend_w; + } + /* Handle the leftovers. */ + while (op < oend) *op++ = *ip++; +} + +/* ZSTD_execSequenceEnd(): + * This version handles cases that are near the end of the output buffer. It requires + * more careful checks to make sure there is no overflow. By separating out these hard + * and unlikely cases, we can speed up the common cases. + * + * NOTE: This function needs to be fast for a single long sequence, but doesn't need + * to be optimized for many small sequences, since those fall into ZSTD_execSequence(). + */ +FORCE_NOINLINE +size_t ZSTD_execSequenceEnd(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; + + /* bounds checks : careful of address space overflow in 32-bit mode */ + RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer"); + RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer"); + assert(op < op + sequenceLength); + assert(oLitEnd < op + sequenceLength); + + /* copy literals */ + ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap); + op = oLitEnd; + *litPtr = iLitEnd; + + /* copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix */ + RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, ""); + match = dictEnd - (prefixStart-match); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } } + ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst); + return sequenceLength; +} + +HINT_INLINE +size_t ZSTD_execSequence(BYTE* op, + BYTE* const oend, seq_t sequence, + const BYTE** litPtr, const BYTE* const litLimit, + const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd) +{ + BYTE* const oLitEnd = op + sequence.litLength; + size_t const sequenceLength = sequence.litLength + sequence.matchLength; + BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ + BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */ + const BYTE* const iLitEnd = *litPtr + sequence.litLength; + const BYTE* match = oLitEnd - sequence.offset; + + assert(op != NULL /* Precondition */); + assert(oend_w < oend /* No underflow */); + /* Handle edge cases in a slow path: + * - Read beyond end of literals + * - Match end is within WILDCOPY_OVERLIMIT of oend + * - 32-bit mode and the match length overflows + */ + if (UNLIKELY( + iLitEnd > litLimit || + oMatchEnd > oend_w || + (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH))) + return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); + + /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */ + assert(op <= oLitEnd /* No overflow */); + assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */); + assert(oMatchEnd <= oend /* No underflow */); + assert(iLitEnd <= litLimit /* Literal length is in bounds */); + assert(oLitEnd <= oend_w /* Can wildcopy literals */); + assert(oMatchEnd <= oend_w /* Can wildcopy matches */); + + /* Copy Literals: + * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9. + * We likely don't need the full 32-byte wildcopy. + */ + assert(WILDCOPY_OVERLENGTH >= 16); + ZSTD_copy16(op, (*litPtr)); + if (UNLIKELY(sequence.litLength > 16)) { + ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap); + } + op = oLitEnd; + *litPtr = iLitEnd; /* update for next sequence */ + + /* Copy Match */ + if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { + /* offset beyond prefix -> go into extDict */ + RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, ""); + match = dictEnd + (match - prefixStart); + if (match + sequence.matchLength <= dictEnd) { + ZSTD_memmove(oLitEnd, match, sequence.matchLength); + return sequenceLength; + } + /* span extDict & currentPrefixSegment */ + { size_t const length1 = dictEnd - match; + ZSTD_memmove(oLitEnd, match, length1); + op = oLitEnd + length1; + sequence.matchLength -= length1; + match = prefixStart; + } } + /* Match within prefix of 1 or more bytes */ + assert(op <= oMatchEnd); + assert(oMatchEnd <= oend_w); + assert(match >= prefixStart); + assert(sequence.matchLength >= 1); + + /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy + * without overlap checking. + */ + if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) { + /* We bet on a full wildcopy for matches, since we expect matches to be + * longer than literals (in general). In silesia, ~10% of matches are longer + * than 16 bytes. + */ + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap); + return sequenceLength; + } + assert(sequence.offset < WILDCOPY_VECLEN); + + /* Copy 8 bytes and spread the offset to be >= 8. */ + ZSTD_overlapCopy8(&op, &match, sequence.offset); + + /* If the match length is > 8 bytes, then continue with the wildcopy. */ + if (sequence.matchLength > 8) { + assert(op < oMatchEnd); + ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); + } + return sequenceLength; +} + +static void +ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt) +{ + const void* ptr = dt; + const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr; + DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); + DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits", + (U32)DStatePtr->state, DTableH->tableLog); + BIT_reloadDStream(bitD); + DStatePtr->table = dt + 1; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD) +{ + ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state]; + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +FORCE_INLINE_TEMPLATE void +ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo) +{ + U32 const nbBits = DInfo.nbBits; + size_t const lowBits = BIT_readBits(bitD, nbBits); + DStatePtr->state = DInfo.nextState + lowBits; +} + +/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum + * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) + * bits before reloading. This value is the maximum number of bytes we read + * after reloading when we are decoding long offsets. + */ +#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ + (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ + ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \ + : 0) + +typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e; +typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e; + +FORCE_INLINE_TEMPLATE seq_t +ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch) +{ + seq_t seq; + ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state]; + ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state]; + ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state]; + U32 const llBase = llDInfo.baseValue; + U32 const mlBase = mlDInfo.baseValue; + U32 const ofBase = ofDInfo.baseValue; + BYTE const llBits = llDInfo.nbAdditionalBits; + BYTE const mlBits = mlDInfo.nbAdditionalBits; + BYTE const ofBits = ofDInfo.nbAdditionalBits; + BYTE const totalBits = llBits+mlBits+ofBits; + + /* sequence */ + { size_t offset; + if (ofBits > 1) { + ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1); + ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5); + assert(ofBits <= MaxOff); + if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) { + U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed); + offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits); + BIT_reloadDStream(&seqState->DStream); + if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits); + assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */ + } else { + offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); + } + seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } else { + U32 const ll0 = (llBase == 0); + if (LIKELY((ofBits == 0))) { + if (LIKELY(!ll0)) + offset = seqState->prevOffset[0]; + else { + offset = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset; + } + } else { + offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1); + { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; + temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ + if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; + seqState->prevOffset[1] = seqState->prevOffset[0]; + seqState->prevOffset[0] = offset = temp; + } } } + seq.offset = offset; + } + + seq.matchLength = mlBase; + if (mlBits > 0) + seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/); + + if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32)) + BIT_reloadDStream(&seqState->DStream); + if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog))) + BIT_reloadDStream(&seqState->DStream); + /* Ensure there are enough bits to read the rest of data in 64-bit mode. */ + ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64); + + seq.litLength = llBase; + if (llBits > 0) + seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/); + + if (MEM_32bits()) + BIT_reloadDStream(&seqState->DStream); + + DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + + if (prefetch == ZSTD_p_prefetch) { + size_t const pos = seqState->pos + seq.litLength; + const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart; + seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted. + * No consequence though : no memory access will occur, offset is only used for prefetching */ + seqState->pos = pos + seq.matchLength; + } + + /* ANS state update + * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo(). + * clang-9.2.0 does 7% worse with ZSTD_updateFseState(). + * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the + * better option, so it is the default for other compilers. But, if you + * measure that it is worse, please put up a pull request. + */ + { +#if !defined(__clang__) + const int kUseUpdateFseState = 1; +#else + const int kUseUpdateFseState = 0; +#endif + if (kUseUpdateFseState) { + ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ + ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ + } else { + ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */ + if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ + ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */ + } + } + + return seq; +} + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION +MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd) +{ + size_t const windowSize = dctx->fParams.windowSize; + /* No dictionary used. */ + if (dctx->dictContentEndForFuzzing == NULL) return 0; + /* Dictionary is our prefix. */ + if (prefixStart == dctx->dictContentBeginForFuzzing) return 1; + /* Dictionary is not our ext-dict. */ + if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0; + /* Dictionary is not within our window size. */ + if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0; + /* Dictionary is active. */ + return 1; +} + +MEM_STATIC void ZSTD_assertValidSequence( + ZSTD_DCtx const* dctx, + BYTE const* op, BYTE const* oend, + seq_t const seq, + BYTE const* prefixStart, BYTE const* virtualStart) +{ +#if DEBUGLEVEL >= 1 + size_t const windowSize = dctx->fParams.windowSize; + size_t const sequenceSize = seq.litLength + seq.matchLength; + BYTE const* const oLitEnd = op + seq.litLength; + DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u", + (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset); + assert(op <= oend); + assert((size_t)(oend - op) >= sequenceSize); + assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX); + if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) { + size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing); + /* Offset must be within the dictionary. */ + assert(seq.offset <= (size_t)(oLitEnd - virtualStart)); + assert(seq.offset <= windowSize + dictSize); + } else { + /* Offset must be within our window. */ + assert(seq.offset <= windowSize); + } +#else + (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart; +#endif +} +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +FORCE_INLINE_TEMPLATE size_t +DONT_VECTORIZE +ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const vBase = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + DEBUGLOG(5, "ZSTD_decompressSequences_body"); + (void)frame; + + /* Regen sequences */ + if (nbSeq) { + seqState_t seqState; + size_t error = 0; + dctx->fseEntropy = 1; + { U32 i; for (i=0; ientropy.rep[i]; } + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + assert(dst != NULL); + + ZSTD_STATIC_ASSERT( + BIT_DStream_unfinished < BIT_DStream_completed && + BIT_DStream_endOfBuffer < BIT_DStream_completed && + BIT_DStream_completed < BIT_DStream_overflow); + +#if defined(__x86_64__) + /* Align the decompression loop to 32 + 16 bytes. + * + * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression + * speed swings based on the alignment of the decompression loop. This + * performance swing is caused by parts of the decompression loop falling + * out of the DSB. The entire decompression loop should fit in the DSB, + * when it can't we get much worse performance. You can measure if you've + * hit the good case or the bad case with this perf command for some + * compressed file test.zst: + * + * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \ + * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst + * + * If you see most cycles served out of the MITE you've hit the bad case. + * If you see most cycles served out of the DSB you've hit the good case. + * If it is pretty even then you may be in an okay case. + * + * I've been able to reproduce this issue on the following CPUs: + * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9 + * Use Instruments->Counters to get DSB/MITE cycles. + * I never got performance swings, but I was able to + * go from the good case of mostly DSB to half of the + * cycles served from MITE. + * - Coffeelake: Intel i9-9900k + * + * I haven't been able to reproduce the instability or DSB misses on any + * of the following CPUS: + * - Haswell + * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH + * - Skylake + * + * If you are seeing performance stability this script can help test. + * It tests on 4 commits in zstd where I saw performance change. + * + * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4 + */ + __asm__(".p2align 5"); + __asm__("nop"); + __asm__(".p2align 4"); +#endif + for ( ; ; ) { + seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch); + size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd); +#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE) + assert(!ZSTD_isError(oneSeqSize)); + if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase); +#endif + DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize); + BIT_reloadDStream(&(seqState.DStream)); + op += oneSeqSize; + /* gcc and clang both don't like early returns in this loop. + * Instead break and check for an error at the end of the loop. + */ + if (UNLIKELY(ZSTD_isError(oneSeqSize))) { + error = oneSeqSize; + break; + } + if (UNLIKELY(!--nbSeq)) break; + } + + /* check if reached exact end */ + DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); + if (ZSTD_isError(error)) return error; + RETURN_ERROR_IF(nbSeq, corruption_detected, ""); + RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, ""); + /* save reps for next block */ + { U32 i; for (i=0; ientropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequences_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +FORCE_INLINE_TEMPLATE size_t +ZSTD_decompressSequencesLong_body( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + const BYTE* ip = (const BYTE*)seqStart; + const BYTE* const iend = ip + seqSize; + BYTE* const ostart = (BYTE*)dst; + BYTE* const oend = ostart + maxDstSize; + BYTE* op = ostart; + const BYTE* litPtr = dctx->litPtr; + const BYTE* const litEnd = litPtr + dctx->litSize; + const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart); + const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart); + const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); + (void)frame; + + /* Regen sequences */ + if (nbSeq) { +#define STORED_SEQS 4 +#define STORED_SEQS_MASK (STORED_SEQS-1) +#define ADVANCED_SEQS 4 + seq_t sequences[STORED_SEQS]; + int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); + seqState_t seqState; + int seqNb; + dctx->fseEntropy = 1; + { int i; for (i=0; ientropy.rep[i]; } + seqState.prefixStart = prefixStart; + seqState.pos = (size_t)(op-prefixStart); + seqState.dictEnd = dictEnd; + assert(dst != NULL); + assert(iend >= ip); + RETURN_ERROR_IF( + ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), + corruption_detected, ""); + ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); + ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); + ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); + + /* prepare in advance */ + for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNbentropy.rep[i] = (U32)(seqState.prevOffset[i]); } + } + + /* last literal segment */ + { size_t const lastLLSize = litEnd - litPtr; + RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, ""); + if (op != NULL) { + ZSTD_memcpy(op, litPtr, lastLLSize); + op += lastLLSize; + } + } + + return op-ostart; +} + +static size_t +ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if DYNAMIC_BMI2 + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static TARGET_ATTRIBUTE("bmi2") size_t +DONT_VECTORIZE +ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +static TARGET_ATTRIBUTE("bmi2") size_t +ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + +#endif /* DYNAMIC_BMI2 */ + +typedef size_t (*ZSTD_decompressSequences_t)( + ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame); + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG +static size_t +ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + DEBUGLOG(5, "ZSTD_decompressSequences"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + } +#endif + return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */ + + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT +/* ZSTD_decompressSequencesLong() : + * decompression function triggered when a minimum share of offsets is considered "long", + * aka out of cache. + * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance". + * This function will try to mitigate main memory latency through the use of prefetching */ +static size_t +ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, + void* dst, size_t maxDstSize, + const void* seqStart, size_t seqSize, int nbSeq, + const ZSTD_longOffset_e isLongOffset, + const int frame) +{ + DEBUGLOG(5, "ZSTD_decompressSequencesLong"); +#if DYNAMIC_BMI2 + if (dctx->bmi2) { + return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); + } +#endif + return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame); +} +#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */ + + + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) +/* ZSTD_getLongOffsetsShare() : + * condition : offTable must be valid + * @return : "share" of long offsets (arbitrarily defined as > (1<<23)) + * compared to maximum possible of (1< 22) total += 1; + } + + assert(tableLog <= OffFSELog); + total <<= (OffFSELog - tableLog); /* scale to OffFSELog */ + + return total; +} +#endif + +size_t +ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame) +{ /* blockType == blockCompressed */ + const BYTE* ip = (const BYTE*)src; + /* isLongOffset must be true if there are long offsets. + * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN. + * We don't expect that to be the case in 64-bit mode. + * In block mode, window size is not known, so we have to be conservative. + * (note: but it could be evaluated from current-lowLimit) + */ + ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); + DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); + + RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, ""); + + /* Decode literals section */ + { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); + DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize); + if (ZSTD_isError(litCSize)) return litCSize; + ip += litCSize; + srcSize -= litCSize; + } + + /* Build Decoding Tables */ + { + /* These macros control at build-time which decompressor implementation + * we use. If neither is defined, we do some inspection and dispatch at + * runtime. + */ +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + int usePrefetchDecoder = dctx->ddictIsCold; +#endif + int nbSeq; + size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize); + if (ZSTD_isError(seqHSize)) return seqHSize; + ip += seqHSize; + srcSize -= seqHSize; + + RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled"); + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if ( !usePrefetchDecoder + && (!frame || (dctx->fParams.windowSize > (1<<24))) + && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */ + U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr); + U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */ + usePrefetchDecoder = (shareLongOffsets >= minShare); + } +#endif + + dctx->ddictIsCold = 0; + +#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \ + !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG) + if (usePrefetchDecoder) +#endif +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT + return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +#endif + +#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG + /* else */ + return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame); +#endif + } +} + + +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize) +{ + if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */ + dctx->dictEnd = dctx->previousDstEnd; + dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart)); + dctx->prefixStart = dst; + dctx->previousDstEnd = dst; + } +} + + +size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize) +{ + size_t dSize; + ZSTD_checkContinuity(dctx, dst, dstCapacity); + dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0); + dctx->previousDstEnd = (char*)dst + dSize; + return dSize; +} diff --git a/lib/zstd/decompress/zstd_decompress_block.h b/lib/zstd/decompress/zstd_decompress_block.h new file mode 100644 index 000000000000..e7f5f6689459 --- /dev/null +++ b/lib/zstd/decompress/zstd_decompress_block.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +#ifndef ZSTD_DEC_BLOCK_H +#define ZSTD_DEC_BLOCK_H + +/*-******************************************************* + * Dependencies + *********************************************************/ +#include "../common/zstd_deps.h" /* size_t */ +#include /* DCtx, and some public functions */ +#include "../common/zstd_internal.h" /* blockProperties_t, and some public functions */ +#include "zstd_decompress_internal.h" /* ZSTD_seqSymbol */ + + +/* === Prototypes === */ + +/* note: prototypes already published within `zstd.h` : + * ZSTD_decompressBlock() + */ + +/* note: prototypes already published within `zstd_internal.h` : + * ZSTD_getcBlockSize() + * ZSTD_decodeSeqHeaders() + */ + + +/* ZSTD_decompressBlock_internal() : + * decompress block, starting at `src`, + * into destination buffer `dst`. + * @return : decompressed block size, + * or an error code (which can be tested using ZSTD_isError()) + */ +size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx, + void* dst, size_t dstCapacity, + const void* src, size_t srcSize, const int frame); + +/* ZSTD_buildFSETable() : + * generate FSE decoding table for one symbol (ll, ml or off) + * this function must be called with valid parameters only + * (dt is large enough, normalizedCounter distribution total is a power of 2, max is within range, etc.) + * in which case it cannot fail. + * The workspace must be 4-byte aligned and at least ZSTD_BUILD_FSE_TABLE_WKSP_SIZE bytes, which is + * defined in zstd_decompress_internal.h. + * Internal use only. + */ +void ZSTD_buildFSETable(ZSTD_seqSymbol* dt, + const short* normalizedCounter, unsigned maxSymbolValue, + const U32* baseValue, const U32* nbAdditionalBits, + unsigned tableLog, void* wksp, size_t wkspSize, + int bmi2); + + +#endif /* ZSTD_DEC_BLOCK_H */ diff --git a/lib/zstd/decompress/zstd_decompress_internal.h b/lib/zstd/decompress/zstd_decompress_internal.h new file mode 100644 index 000000000000..4b9052f68755 --- /dev/null +++ b/lib/zstd/decompress/zstd_decompress_internal.h @@ -0,0 +1,202 @@ +/* + * Copyright (c) Yann Collet, Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + + +/* zstd_decompress_internal: + * objects and definitions shared within lib/decompress modules */ + + #ifndef ZSTD_DECOMPRESS_INTERNAL_H + #define ZSTD_DECOMPRESS_INTERNAL_H + + +/*-******************************************************* + * Dependencies + *********************************************************/ +#include "../common/mem.h" /* BYTE, U16, U32 */ +#include "../common/zstd_internal.h" /* ZSTD_seqSymbol */ + + + +/*-******************************************************* + * Constants + *********************************************************/ +static UNUSED_ATTR const U32 LL_base[MaxLL+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 18, 20, 22, 24, 28, 32, 40, + 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, + 0x2000, 0x4000, 0x8000, 0x10000 }; + +static UNUSED_ATTR const U32 OF_base[MaxOff+1] = { + 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, + 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, + 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, + 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD }; + +static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 }; + +static UNUSED_ATTR const U32 ML_base[MaxML+1] = { + 3, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, + 19, 20, 21, 22, 23, 24, 25, 26, + 27, 28, 29, 30, 31, 32, 33, 34, + 35, 37, 39, 41, 43, 47, 51, 59, + 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, + 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; + + +/*-******************************************************* + * Decompression types + *********************************************************/ + typedef struct { + U32 fastMode; + U32 tableLog; + } ZSTD_seqSymbol_header; + + typedef struct { + U16 nextState; + BYTE nbAdditionalBits; + BYTE nbBits; + U32 baseValue; + } ZSTD_seqSymbol; + + #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log))) + +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE (sizeof(S16) * (MaxSeq + 1) + (1u << MaxFSELog) + sizeof(U64)) +#define ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32 ((ZSTD_BUILD_FSE_TABLE_WKSP_SIZE + sizeof(U32) - 1) / sizeof(U32)) + +typedef struct { + ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */ + ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */ + ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */ + HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */ + U32 rep[ZSTD_REP_NUM]; + U32 workspace[ZSTD_BUILD_FSE_TABLE_WKSP_SIZE_U32]; +} ZSTD_entropyDTables_t; + +typedef enum { ZSTDds_getFrameHeaderSize, ZSTDds_decodeFrameHeader, + ZSTDds_decodeBlockHeader, ZSTDds_decompressBlock, + ZSTDds_decompressLastBlock, ZSTDds_checkChecksum, + ZSTDds_decodeSkippableHeader, ZSTDds_skipFrame } ZSTD_dStage; + +typedef enum { zdss_init=0, zdss_loadHeader, + zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; + +typedef enum { + ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ + ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ + ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ +} ZSTD_dictUses_e; + +/* Hashset for storing references to multiple ZSTD_DDict within ZSTD_DCtx */ +typedef struct { + const ZSTD_DDict** ddictPtrTable; + size_t ddictPtrTableSize; + size_t ddictPtrCount; +} ZSTD_DDictHashSet; + +struct ZSTD_DCtx_s +{ + const ZSTD_seqSymbol* LLTptr; + const ZSTD_seqSymbol* MLTptr; + const ZSTD_seqSymbol* OFTptr; + const HUF_DTable* HUFptr; + ZSTD_entropyDTables_t entropy; + U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */ + const void* previousDstEnd; /* detect continuity */ + const void* prefixStart; /* start of current segment */ + const void* virtualStart; /* virtual start of previous segment if it was just before current one */ + const void* dictEnd; /* end of previous segment */ + size_t expected; + ZSTD_frameHeader fParams; + U64 processedCSize; + U64 decodedSize; + blockType_e bType; /* used in ZSTD_decompressContinue(), store blockType between block header decoding and block decompression stages */ + ZSTD_dStage stage; + U32 litEntropy; + U32 fseEntropy; + struct xxh64_state xxhState; + size_t headerSize; + ZSTD_format_e format; + ZSTD_forceIgnoreChecksum_e forceIgnoreChecksum; /* User specified: if == 1, will ignore checksums in compressed frame. Default == 0 */ + U32 validateChecksum; /* if == 1, will validate checksum. Is == 1 if (fParams.checksumFlag == 1) and (forceIgnoreChecksum == 0). */ + const BYTE* litPtr; + ZSTD_customMem customMem; + size_t litSize; + size_t rleSize; + size_t staticSize; + int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */ + + /* dictionary */ + ZSTD_DDict* ddictLocal; + const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ + U32 dictID; + int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ + ZSTD_dictUses_e dictUses; + ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */ + ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */ + + /* streaming */ + ZSTD_dStreamStage streamStage; + char* inBuff; + size_t inBuffSize; + size_t inPos; + size_t maxWindowSize; + char* outBuff; + size_t outBuffSize; + size_t outStart; + size_t outEnd; + size_t lhSize; + void* legacyContext; + U32 previousLegacyVersion; + U32 legacyVersion; + U32 hostageByte; + int noForwardProgress; + ZSTD_bufferMode_e outBufferMode; + ZSTD_outBuffer expectedOutBuffer; + + /* workspace */ + BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH]; + BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX]; + + size_t oversizedDuration; + +#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION + void const* dictContentBeginForFuzzing; + void const* dictContentEndForFuzzing; +#endif + + /* Tracing */ +}; /* typedef'd to ZSTD_DCtx within "zstd.h" */ + + +/*-******************************************************* + * Shared internal functions + *********************************************************/ + +/*! ZSTD_loadDEntropy() : + * dict : must point at beginning of a valid zstd dictionary. + * @return : size of dictionary header (size of magic number + dict ID + entropy tables) */ +size_t ZSTD_loadDEntropy(ZSTD_entropyDTables_t* entropy, + const void* const dict, size_t const dictSize); + +/*! ZSTD_checkContinuity() : + * check if next `dst` follows previous position, where decompression ended. + * If yes, do nothing (continue on current segment). + * If not, classify previous segment as "external dictionary", and start a new segment. + * This function cannot fail. */ +void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize); + + +#endif /* ZSTD_DECOMPRESS_INTERNAL_H */ diff --git a/lib/zstd/decompress_sources.h b/lib/zstd/decompress_sources.h new file mode 100644 index 000000000000..f35bef03eb22 --- /dev/null +++ b/lib/zstd/decompress_sources.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +/* + * This file includes every .c file needed for decompression. + * It is used by lib/decompress_unzstd.c to include the decompression + * source into the translation-unit, so it can be used for kernel + * decompression. + */ + +#include "common/debug.c" +#include "common/entropy_common.c" +#include "common/error_private.c" +#include "common/fse_decompress.c" +#include "common/zstd_common.c" +#include "decompress/huf_decompress.c" +#include "decompress/zstd_ddict.c" +#include "decompress/zstd_decompress.c" +#include "decompress/zstd_decompress_block.c" +#include "zstd_decompress_module.c" diff --git a/lib/zstd/entropy_common.c b/lib/zstd/entropy_common.c deleted file mode 100644 index 2b0a643c32c4..000000000000 --- a/lib/zstd/entropy_common.c +++ /dev/null @@ -1,243 +0,0 @@ -/* - * Common functions of New Generation Entropy library - * Copyright (C) 2016, Yann Collet. - * - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - */ - -/* ************************************* -* Dependencies -***************************************/ -#include "error_private.h" /* ERR_*, ERROR */ -#include "fse.h" -#include "huf.h" -#include "mem.h" - -/*=== Version ===*/ -unsigned FSE_versionNumber(void) { return FSE_VERSION_NUMBER; } - -/*=== Error Management ===*/ -unsigned FSE_isError(size_t code) { return ERR_isError(code); } - -unsigned HUF_isError(size_t code) { return ERR_isError(code); } - -/*-************************************************************** -* FSE NCount encoding-decoding -****************************************************************/ -size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSVPtr, unsigned *tableLogPtr, const void *headerBuffer, size_t hbSize) -{ - const BYTE *const istart = (const BYTE *)headerBuffer; - const BYTE *const iend = istart + hbSize; - const BYTE *ip = istart; - int nbBits; - int remaining; - int threshold; - U32 bitStream; - int bitCount; - unsigned charnum = 0; - int previous0 = 0; - - if (hbSize < 4) - return ERROR(srcSize_wrong); - bitStream = ZSTD_readLE32(ip); - nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */ - if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) - return ERROR(tableLog_tooLarge); - bitStream >>= 4; - bitCount = 4; - *tableLogPtr = nbBits; - remaining = (1 << nbBits) + 1; - threshold = 1 << nbBits; - nbBits++; - - while ((remaining > 1) & (charnum <= *maxSVPtr)) { - if (previous0) { - unsigned n0 = charnum; - while ((bitStream & 0xFFFF) == 0xFFFF) { - n0 += 24; - if (ip < iend - 5) { - ip += 2; - bitStream = ZSTD_readLE32(ip) >> bitCount; - } else { - bitStream >>= 16; - bitCount += 16; - } - } - while ((bitStream & 3) == 3) { - n0 += 3; - bitStream >>= 2; - bitCount += 2; - } - n0 += bitStream & 3; - bitCount += 2; - if (n0 > *maxSVPtr) - return ERROR(maxSymbolValue_tooSmall); - while (charnum < n0) - normalizedCounter[charnum++] = 0; - if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) { - ip += bitCount >> 3; - bitCount &= 7; - bitStream = ZSTD_readLE32(ip) >> bitCount; - } else { - bitStream >>= 2; - } - } - { - int const max = (2 * threshold - 1) - remaining; - int count; - - if ((bitStream & (threshold - 1)) < (U32)max) { - count = bitStream & (threshold - 1); - bitCount += nbBits - 1; - } else { - count = bitStream & (2 * threshold - 1); - if (count >= threshold) - count -= max; - bitCount += nbBits; - } - - count--; /* extra accuracy */ - remaining -= count < 0 ? -count : count; /* -1 means +1 */ - normalizedCounter[charnum++] = (short)count; - previous0 = !count; - while (remaining < threshold) { - nbBits--; - threshold >>= 1; - } - - if ((ip <= iend - 7) || (ip + (bitCount >> 3) <= iend - 4)) { - ip += bitCount >> 3; - bitCount &= 7; - } else { - bitCount -= (int)(8 * (iend - 4 - ip)); - ip = iend - 4; - } - bitStream = ZSTD_readLE32(ip) >> (bitCount & 31); - } - } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */ - if (remaining != 1) - return ERROR(corruption_detected); - if (bitCount > 32) - return ERROR(corruption_detected); - *maxSVPtr = charnum - 1; - - ip += (bitCount + 7) >> 3; - return ip - istart; -} - -/*! HUF_readStats() : - Read compact Huffman tree, saved by HUF_writeCTable(). - `huffWeight` is destination buffer. - `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. - @return : size read from `src` , or an error Code . - Note : Needed by HUF_readCTable() and HUF_readDTableX?() . -*/ -size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) -{ - U32 weightTotal; - const BYTE *ip = (const BYTE *)src; - size_t iSize; - size_t oSize; - - if (!srcSize) - return ERROR(srcSize_wrong); - iSize = ip[0]; - /* memset(huffWeight, 0, hwSize); */ /* is not necessary, even though some analyzer complain ... */ - - if (iSize >= 128) { /* special header */ - oSize = iSize - 127; - iSize = ((oSize + 1) / 2); - if (iSize + 1 > srcSize) - return ERROR(srcSize_wrong); - if (oSize >= hwSize) - return ERROR(corruption_detected); - ip += 1; - { - U32 n; - for (n = 0; n < oSize; n += 2) { - huffWeight[n] = ip[n / 2] >> 4; - huffWeight[n + 1] = ip[n / 2] & 15; - } - } - } else { /* header compressed with FSE (normal case) */ - if (iSize + 1 > srcSize) - return ERROR(srcSize_wrong); - oSize = FSE_decompress_wksp(huffWeight, hwSize - 1, ip + 1, iSize, 6, workspace, workspaceSize); /* max (hwSize-1) values decoded, as last one is implied */ - if (FSE_isError(oSize)) - return oSize; - } - - /* collect weight stats */ - memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); - weightTotal = 0; - { - U32 n; - for (n = 0; n < oSize; n++) { - if (huffWeight[n] >= HUF_TABLELOG_MAX) - return ERROR(corruption_detected); - rankStats[huffWeight[n]]++; - weightTotal += (1 << huffWeight[n]) >> 1; - } - } - if (weightTotal == 0) - return ERROR(corruption_detected); - - /* get last non-null symbol weight (implied, total must be 2^n) */ - { - U32 const tableLog = BIT_highbit32(weightTotal) + 1; - if (tableLog > HUF_TABLELOG_MAX) - return ERROR(corruption_detected); - *tableLogPtr = tableLog; - /* determine last weight */ - { - U32 const total = 1 << tableLog; - U32 const rest = total - weightTotal; - U32 const verif = 1 << BIT_highbit32(rest); - U32 const lastWeight = BIT_highbit32(rest) + 1; - if (verif != rest) - return ERROR(corruption_detected); /* last value must be a clean power of 2 */ - huffWeight[oSize] = (BYTE)lastWeight; - rankStats[lastWeight]++; - } - } - - /* check tree construction validity */ - if ((rankStats[1] < 2) || (rankStats[1] & 1)) - return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */ - - /* results */ - *nbSymbolsPtr = (U32)(oSize + 1); - return iSize + 1; -} diff --git a/lib/zstd/error_private.h b/lib/zstd/error_private.h deleted file mode 100644 index 1a60b31f706c..000000000000 --- a/lib/zstd/error_private.h +++ /dev/null @@ -1,53 +0,0 @@ -/** - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - */ - -/* Note : this module is expected to remain private, do not expose it */ - -#ifndef ERROR_H_MODULE -#define ERROR_H_MODULE - -/* **************************************** -* Dependencies -******************************************/ -#include /* size_t */ -#include /* enum list */ - -/* **************************************** -* Compiler-specific -******************************************/ -#define ERR_STATIC static __attribute__((unused)) - -/*-**************************************** -* Customization (error_public.h) -******************************************/ -typedef ZSTD_ErrorCode ERR_enum; -#define PREFIX(name) ZSTD_error_##name - -/*-**************************************** -* Error codes handling -******************************************/ -#define ERROR(name) ((size_t)-PREFIX(name)) - -ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); } - -ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) -{ - if (!ERR_isError(code)) - return (ERR_enum)0; - return (ERR_enum)(0 - code); -} - -#endif /* ERROR_H_MODULE */ diff --git a/lib/zstd/fse.h b/lib/zstd/fse.h deleted file mode 100644 index 7460ab04b191..000000000000 --- a/lib/zstd/fse.h +++ /dev/null @@ -1,575 +0,0 @@ -/* - * FSE : Finite State Entropy codec - * Public Prototypes declaration - * Copyright (C) 2013-2016, Yann Collet. - * - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - */ -#ifndef FSE_H -#define FSE_H - -/*-***************************************** -* Dependencies -******************************************/ -#include /* size_t, ptrdiff_t */ - -/*-***************************************** -* FSE_PUBLIC_API : control library symbols visibility -******************************************/ -#define FSE_PUBLIC_API - -/*------ Version ------*/ -#define FSE_VERSION_MAJOR 0 -#define FSE_VERSION_MINOR 9 -#define FSE_VERSION_RELEASE 0 - -#define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE -#define FSE_QUOTE(str) #str -#define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str) -#define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION) - -#define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR * 100 * 100 + FSE_VERSION_MINOR * 100 + FSE_VERSION_RELEASE) -FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */ - -/*-***************************************** -* Tool functions -******************************************/ -FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */ - -/* Error Management */ -FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */ - -/*-***************************************** -* FSE detailed API -******************************************/ -/*! -FSE_compress() does the following: -1. count symbol occurrence from source[] into table count[] -2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog) -3. save normalized counters to memory buffer using writeNCount() -4. build encoding table 'CTable' from normalized counters -5. encode the data stream using encoding table 'CTable' - -FSE_decompress() does the following: -1. read normalized counters with readNCount() -2. build decoding table 'DTable' from normalized counters -3. decode the data stream using decoding table 'DTable' - -The following API allows targeting specific sub-functions for advanced tasks. -For example, it's possible to compress several blocks using the same 'CTable', -or to save and provide normalized distribution using external method. -*/ - -/* *** COMPRESSION *** */ -/*! FSE_optimalTableLog(): - dynamically downsize 'tableLog' when conditions are met. - It saves CPU time, by using smaller tables, while preserving or even improving compression ratio. - @return : recommended tableLog (necessarily <= 'maxTableLog') */ -FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); - -/*! FSE_normalizeCount(): - normalize counts so that sum(count[]) == Power_of_2 (2^tableLog) - 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1). - @return : tableLog, - or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t srcSize, unsigned maxSymbolValue); - -/*! FSE_NCountWriteBound(): - Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'. - Typically useful for allocation purpose. */ -FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog); - -/*! FSE_writeNCount(): - Compactly save 'normalizedCounter' into 'buffer'. - @return : size of the compressed table, - or an errorCode, which can be tested using FSE_isError(). */ -FSE_PUBLIC_API size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog); - -/*! Constructor and Destructor of FSE_CTable. - Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */ -typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */ - -/*! FSE_compress_usingCTable(): - Compress `src` using `ct` into `dst` which must be already allocated. - @return : size of compressed data (<= `dstCapacity`), - or 0 if compressed data could not fit into `dst`, - or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_compress_usingCTable(void *dst, size_t dstCapacity, const void *src, size_t srcSize, const FSE_CTable *ct); - -/*! -Tutorial : ----------- -The first step is to count all symbols. FSE_count() does this job very fast. -Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells. -'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0] -maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value) -FSE_count() will return the number of occurrence of the most frequent symbol. -This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). - -The next step is to normalize the frequencies. -FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'. -It also guarantees a minimum of 1 to any Symbol with frequency >= 1. -You can use 'tableLog'==0 to mean "use default tableLog value". -If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(), -which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default"). - -The result of FSE_normalizeCount() will be saved into a table, -called 'normalizedCounter', which is a table of signed short. -'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells. -The return value is tableLog if everything proceeded as expected. -It is 0 if there is a single symbol within distribution. -If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()). - -'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount(). -'buffer' must be already allocated. -For guaranteed success, buffer size must be at least FSE_headerBound(). -The result of the function is the number of bytes written into 'buffer'. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small). - -'normalizedCounter' can then be used to create the compression table 'CTable'. -The space required by 'CTable' must be already allocated, using FSE_createCTable(). -You can then use FSE_buildCTable() to fill 'CTable'. -If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()). - -'CTable' can then be used to compress 'src', with FSE_compress_usingCTable(). -Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize' -The function returns the size of compressed data (without header), necessarily <= `dstCapacity`. -If it returns '0', compressed data could not fit into 'dst'. -If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()). -*/ - -/* *** DECOMPRESSION *** */ - -/*! FSE_readNCount(): - Read compactly saved 'normalizedCounter' from 'rBuffer'. - @return : size read from 'rBuffer', - or an errorCode, which can be tested using FSE_isError(). - maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */ -FSE_PUBLIC_API size_t FSE_readNCount(short *normalizedCounter, unsigned *maxSymbolValuePtr, unsigned *tableLogPtr, const void *rBuffer, size_t rBuffSize); - -/*! Constructor and Destructor of FSE_DTable. - Note that its size depends on 'tableLog' */ -typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */ - -/*! FSE_buildDTable(): - Builds 'dt', which must be already allocated, using FSE_createDTable(). - return : 0, or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_buildDTable_wksp(FSE_DTable *dt, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize); - -/*! FSE_decompress_usingDTable(): - Decompress compressed source `cSrc` of size `cSrcSize` using `dt` - into `dst` which must be already allocated. - @return : size of regenerated data (necessarily <= `dstCapacity`), - or an errorCode, which can be tested using FSE_isError() */ -FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt); - -/*! -Tutorial : ----------- -(Note : these functions only decompress FSE-compressed blocks. - If block is uncompressed, use memcpy() instead - If block is a single repeated byte, use memset() instead ) - -The first step is to obtain the normalized frequencies of symbols. -This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount(). -'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short. -In practice, that means it's necessary to know 'maxSymbolValue' beforehand, -or size the table to handle worst case situations (typically 256). -FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'. -The result of FSE_readNCount() is the number of bytes read from 'rBuffer'. -Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that. -If there is an error, the function will return an error code, which can be tested using FSE_isError(). - -The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'. -This is performed by the function FSE_buildDTable(). -The space required by 'FSE_DTable' must be already allocated using FSE_createDTable(). -If there is an error, the function will return an error code, which can be tested using FSE_isError(). - -`FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable(). -`cSrcSize` must be strictly correct, otherwise decompression will fail. -FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`). -If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small) -*/ - -/* *** Dependency *** */ -#include "bitstream.h" - -/* ***************************************** -* Static allocation -*******************************************/ -/* FSE buffer bounds */ -#define FSE_NCOUNTBOUND 512 -#define FSE_BLOCKBOUND(size) (size + (size >> 7)) -#define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ - -/* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ -#define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1 << (maxTableLog - 1)) + ((maxSymbolValue + 1) * 2)) -#define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1 << maxTableLog)) - -/* ***************************************** -* FSE advanced API -*******************************************/ -/* FSE_count_wksp() : - * Same as FSE_count(), but using an externally provided scratch buffer. - * `workSpace` size must be table of >= `1024` unsigned - */ -size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace); - -/* FSE_countFast_wksp() : - * Same as FSE_countFast(), but using an externally provided scratch buffer. - * `workSpace` must be a table of minimum `1024` unsigned - */ -size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize, unsigned *workSpace); - -/*! FSE_count_simple - * Same as FSE_countFast(), but does not use any additional memory (not even on stack). - * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`). -*/ -size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize); - -unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); -/**< same as FSE_optimalTableLog(), which used `minus==2` */ - -size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits); -/**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ - -size_t FSE_buildCTable_rle(FSE_CTable *ct, unsigned char symbolValue); -/**< build a fake FSE_CTable, designed to compress always the same symbolValue */ - -/* FSE_buildCTable_wksp() : - * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). - * `wkspSize` must be >= `(1<= BIT_DStream_completed - -When it's done, verify decompression is fully completed, by checking both DStream and the relevant states. -Checking if DStream has reached its end is performed by : - BIT_endOfDStream(&DStream); -Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible. - FSE_endOfDState(&DState); -*/ - -/* ***************************************** -* FSE unsafe API -*******************************************/ -static unsigned char FSE_decodeSymbolFast(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD); -/* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */ - -/* ***************************************** -* Implementation of inlined functions -*******************************************/ -typedef struct { - int deltaFindState; - U32 deltaNbBits; -} FSE_symbolCompressionTransform; /* total 8 bytes */ - -ZSTD_STATIC void FSE_initCState(FSE_CState_t *statePtr, const FSE_CTable *ct) -{ - const void *ptr = ct; - const U16 *u16ptr = (const U16 *)ptr; - const U32 tableLog = ZSTD_read16(ptr); - statePtr->value = (ptrdiff_t)1 << tableLog; - statePtr->stateTable = u16ptr + 2; - statePtr->symbolTT = ((const U32 *)ct + 1 + (tableLog ? (1 << (tableLog - 1)) : 1)); - statePtr->stateLog = tableLog; -} - -/*! FSE_initCState2() : -* Same as FSE_initCState(), but the first symbol to include (which will be the last to be read) -* uses the smallest state value possible, saving the cost of this symbol */ -ZSTD_STATIC void FSE_initCState2(FSE_CState_t *statePtr, const FSE_CTable *ct, U32 symbol) -{ - FSE_initCState(statePtr, ct); - { - const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform *)(statePtr->symbolTT))[symbol]; - const U16 *stateTable = (const U16 *)(statePtr->stateTable); - U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1 << 15)) >> 16); - statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits; - statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; - } -} - -ZSTD_STATIC void FSE_encodeSymbol(BIT_CStream_t *bitC, FSE_CState_t *statePtr, U32 symbol) -{ - const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform *)(statePtr->symbolTT))[symbol]; - const U16 *const stateTable = (const U16 *)(statePtr->stateTable); - U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16); - BIT_addBits(bitC, statePtr->value, nbBitsOut); - statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState]; -} - -ZSTD_STATIC void FSE_flushCState(BIT_CStream_t *bitC, const FSE_CState_t *statePtr) -{ - BIT_addBits(bitC, statePtr->value, statePtr->stateLog); - BIT_flushBits(bitC); -} - -/* ====== Decompression ====== */ - -typedef struct { - U16 tableLog; - U16 fastMode; -} FSE_DTableHeader; /* sizeof U32 */ - -typedef struct { - unsigned short newState; - unsigned char symbol; - unsigned char nbBits; -} FSE_decode_t; /* size == U32 */ - -ZSTD_STATIC void FSE_initDState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD, const FSE_DTable *dt) -{ - const void *ptr = dt; - const FSE_DTableHeader *const DTableH = (const FSE_DTableHeader *)ptr; - DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog); - BIT_reloadDStream(bitD); - DStatePtr->table = dt + 1; -} - -ZSTD_STATIC BYTE FSE_peekSymbol(const FSE_DState_t *DStatePtr) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state]; - return DInfo.symbol; -} - -ZSTD_STATIC void FSE_updateState(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - size_t const lowBits = BIT_readBits(bitD, nbBits); - DStatePtr->state = DInfo.newState + lowBits; -} - -ZSTD_STATIC BYTE FSE_decodeSymbol(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - BYTE const symbol = DInfo.symbol; - size_t const lowBits = BIT_readBits(bitD, nbBits); - - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} - -/*! FSE_decodeSymbolFast() : - unsafe, only works if no symbol has a probability > 50% */ -ZSTD_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t *DStatePtr, BIT_DStream_t *bitD) -{ - FSE_decode_t const DInfo = ((const FSE_decode_t *)(DStatePtr->table))[DStatePtr->state]; - U32 const nbBits = DInfo.nbBits; - BYTE const symbol = DInfo.symbol; - size_t const lowBits = BIT_readBitsFast(bitD, nbBits); - - DStatePtr->state = DInfo.newState + lowBits; - return symbol; -} - -ZSTD_STATIC unsigned FSE_endOfDState(const FSE_DState_t *DStatePtr) { return DStatePtr->state == 0; } - -/* ************************************************************** -* Tuning parameters -****************************************************************/ -/*!MEMORY_USAGE : -* Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) -* Increasing memory usage improves compression ratio -* Reduced memory usage can improve speed, due to cache effect -* Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ -#ifndef FSE_MAX_MEMORY_USAGE -#define FSE_MAX_MEMORY_USAGE 14 -#endif -#ifndef FSE_DEFAULT_MEMORY_USAGE -#define FSE_DEFAULT_MEMORY_USAGE 13 -#endif - -/*!FSE_MAX_SYMBOL_VALUE : -* Maximum symbol value authorized. -* Required for proper stack allocation */ -#ifndef FSE_MAX_SYMBOL_VALUE -#define FSE_MAX_SYMBOL_VALUE 255 -#endif - -/* ************************************************************** -* template functions type & suffix -****************************************************************/ -#define FSE_FUNCTION_TYPE BYTE -#define FSE_FUNCTION_EXTENSION -#define FSE_DECODE_TYPE FSE_decode_t - -/* *************************************************************** -* Constants -*****************************************************************/ -#define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE - 2) -#define FSE_MAX_TABLESIZE (1U << FSE_MAX_TABLELOG) -#define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE - 1) -#define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE - 2) -#define FSE_MIN_TABLELOG 5 - -#define FSE_TABLELOG_ABSOLUTE_MAX 15 -#if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX -#error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported" -#endif - -#define FSE_TABLESTEP(tableSize) ((tableSize >> 1) + (tableSize >> 3) + 3) - -#endif /* FSE_H */ diff --git a/lib/zstd/fse_compress.c b/lib/zstd/fse_compress.c deleted file mode 100644 index ef3d1741d532..000000000000 --- a/lib/zstd/fse_compress.c +++ /dev/null @@ -1,795 +0,0 @@ -/* - * FSE : Finite State Entropy encoder - * Copyright (C) 2013-2015, Yann Collet. - * - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - */ - -/* ************************************************************** -* Compiler specifics -****************************************************************/ -#define FORCE_INLINE static __always_inline - -/* ************************************************************** -* Includes -****************************************************************/ -#include "bitstream.h" -#include "fse.h" -#include -#include -#include -#include /* memcpy, memset */ - -/* ************************************************************** -* Error Management -****************************************************************/ -#define FSE_STATIC_ASSERT(c) \ - { \ - enum { FSE_static_assert = 1 / (int)(!!(c)) }; \ - } /* use only *after* variable declarations */ - -/* ************************************************************** -* Templates -****************************************************************/ -/* - designed to be included - for type-specific functions (template emulation in C) - Objective is to write these functions only once, for improved maintenance -*/ - -/* safety checks */ -#ifndef FSE_FUNCTION_EXTENSION -#error "FSE_FUNCTION_EXTENSION must be defined" -#endif -#ifndef FSE_FUNCTION_TYPE -#error "FSE_FUNCTION_TYPE must be defined" -#endif - -/* Function names */ -#define FSE_CAT(X, Y) X##Y -#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y) -#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y) - -/* Function templates */ - -/* FSE_buildCTable_wksp() : - * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). - * wkspSize should be sized to handle worst case situation, which is `1<> 1 : 1); - FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT); - U32 const step = FSE_TABLESTEP(tableSize); - U32 highThreshold = tableSize - 1; - - U32 *cumul; - FSE_FUNCTION_TYPE *tableSymbol; - size_t spaceUsed32 = 0; - - cumul = (U32 *)workspace + spaceUsed32; - spaceUsed32 += FSE_MAX_SYMBOL_VALUE + 2; - tableSymbol = (FSE_FUNCTION_TYPE *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(sizeof(FSE_FUNCTION_TYPE) * ((size_t)1 << tableLog), sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > workspaceSize) - return ERROR(tableLog_tooLarge); - workspace = (U32 *)workspace + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - - /* CTable header */ - tableU16[-2] = (U16)tableLog; - tableU16[-1] = (U16)maxSymbolValue; - - /* For explanations on how to distribute symbol values over the table : - * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */ - - /* symbol start positions */ - { - U32 u; - cumul[0] = 0; - for (u = 1; u <= maxSymbolValue + 1; u++) { - if (normalizedCounter[u - 1] == -1) { /* Low proba symbol */ - cumul[u] = cumul[u - 1] + 1; - tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u - 1); - } else { - cumul[u] = cumul[u - 1] + normalizedCounter[u - 1]; - } - } - cumul[maxSymbolValue + 1] = tableSize + 1; - } - - /* Spread symbols */ - { - U32 position = 0; - U32 symbol; - for (symbol = 0; symbol <= maxSymbolValue; symbol++) { - int nbOccurences; - for (nbOccurences = 0; nbOccurences < normalizedCounter[symbol]; nbOccurences++) { - tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; - position = (position + step) & tableMask; - while (position > highThreshold) - position = (position + step) & tableMask; /* Low proba area */ - } - } - - if (position != 0) - return ERROR(GENERIC); /* Must have gone through all positions */ - } - - /* Build table */ - { - U32 u; - for (u = 0; u < tableSize; u++) { - FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */ - tableU16[cumul[s]++] = (U16)(tableSize + u); /* TableU16 : sorted by symbol order; gives next state value */ - } - } - - /* Build Symbol Transformation Table */ - { - unsigned total = 0; - unsigned s; - for (s = 0; s <= maxSymbolValue; s++) { - switch (normalizedCounter[s]) { - case 0: break; - - case -1: - case 1: - symbolTT[s].deltaNbBits = (tableLog << 16) - (1 << tableLog); - symbolTT[s].deltaFindState = total - 1; - total++; - break; - default: { - U32 const maxBitsOut = tableLog - BIT_highbit32(normalizedCounter[s] - 1); - U32 const minStatePlus = normalizedCounter[s] << maxBitsOut; - symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus; - symbolTT[s].deltaFindState = total - normalizedCounter[s]; - total += normalizedCounter[s]; - } - } - } - } - - return 0; -} - -/*-************************************************************** -* FSE NCount encoding-decoding -****************************************************************/ -size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) -{ - size_t const maxHeaderSize = (((maxSymbolValue + 1) * tableLog) >> 3) + 3; - return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ -} - -static size_t FSE_writeNCount_generic(void *header, size_t headerBufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, - unsigned writeIsSafe) -{ - BYTE *const ostart = (BYTE *)header; - BYTE *out = ostart; - BYTE *const oend = ostart + headerBufferSize; - int nbBits; - const int tableSize = 1 << tableLog; - int remaining; - int threshold; - U32 bitStream; - int bitCount; - unsigned charnum = 0; - int previous0 = 0; - - bitStream = 0; - bitCount = 0; - /* Table Size */ - bitStream += (tableLog - FSE_MIN_TABLELOG) << bitCount; - bitCount += 4; - - /* Init */ - remaining = tableSize + 1; /* +1 for extra accuracy */ - threshold = tableSize; - nbBits = tableLog + 1; - - while (remaining > 1) { /* stops at 1 */ - if (previous0) { - unsigned start = charnum; - while (!normalizedCounter[charnum]) - charnum++; - while (charnum >= start + 24) { - start += 24; - bitStream += 0xFFFFU << bitCount; - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream >> 8); - out += 2; - bitStream >>= 16; - } - while (charnum >= start + 3) { - start += 3; - bitStream += 3 << bitCount; - bitCount += 2; - } - bitStream += (charnum - start) << bitCount; - bitCount += 2; - if (bitCount > 16) { - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream >> 8); - out += 2; - bitStream >>= 16; - bitCount -= 16; - } - } - { - int count = normalizedCounter[charnum++]; - int const max = (2 * threshold - 1) - remaining; - remaining -= count < 0 ? -count : count; - count++; /* +1 for extra accuracy */ - if (count >= threshold) - count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */ - bitStream += count << bitCount; - bitCount += nbBits; - bitCount -= (count < max); - previous0 = (count == 1); - if (remaining < 1) - return ERROR(GENERIC); - while (remaining < threshold) - nbBits--, threshold >>= 1; - } - if (bitCount > 16) { - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream >> 8); - out += 2; - bitStream >>= 16; - bitCount -= 16; - } - } - - /* flush remaining bitStream */ - if ((!writeIsSafe) && (out > oend - 2)) - return ERROR(dstSize_tooSmall); /* Buffer overflow */ - out[0] = (BYTE)bitStream; - out[1] = (BYTE)(bitStream >> 8); - out += (bitCount + 7) / 8; - - if (charnum > maxSymbolValue + 1) - return ERROR(GENERIC); - - return (out - ostart); -} - -size_t FSE_writeNCount(void *buffer, size_t bufferSize, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) -{ - if (tableLog > FSE_MAX_TABLELOG) - return ERROR(tableLog_tooLarge); /* Unsupported */ - if (tableLog < FSE_MIN_TABLELOG) - return ERROR(GENERIC); /* Unsupported */ - - if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog)) - return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0); - - return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1); -} - -/*-************************************************************** -* Counting histogram -****************************************************************/ -/*! FSE_count_simple - This function counts byte values within `src`, and store the histogram into table `count`. - It doesn't use any additional memory. - But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. - For this reason, prefer using a table `count` with 256 elements. - @return : count of most numerous element -*/ -size_t FSE_count_simple(unsigned *count, unsigned *maxSymbolValuePtr, const void *src, size_t srcSize) -{ - const BYTE *ip = (const BYTE *)src; - const BYTE *const end = ip + srcSize; - unsigned maxSymbolValue = *maxSymbolValuePtr; - unsigned max = 0; - - memset(count, 0, (maxSymbolValue + 1) * sizeof(*count)); - if (srcSize == 0) { - *maxSymbolValuePtr = 0; - return 0; - } - - while (ip < end) - count[*ip++]++; - - while (!count[maxSymbolValue]) - maxSymbolValue--; - *maxSymbolValuePtr = maxSymbolValue; - - { - U32 s; - for (s = 0; s <= maxSymbolValue; s++) - if (count[s] > max) - max = count[s]; - } - - return (size_t)max; -} - -/* FSE_count_parallel_wksp() : - * Same as FSE_count_parallel(), but using an externally provided scratch buffer. - * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */ -static size_t FSE_count_parallel_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned checkMax, - unsigned *const workSpace) -{ - const BYTE *ip = (const BYTE *)source; - const BYTE *const iend = ip + sourceSize; - unsigned maxSymbolValue = *maxSymbolValuePtr; - unsigned max = 0; - U32 *const Counting1 = workSpace; - U32 *const Counting2 = Counting1 + 256; - U32 *const Counting3 = Counting2 + 256; - U32 *const Counting4 = Counting3 + 256; - - memset(Counting1, 0, 4 * 256 * sizeof(unsigned)); - - /* safety checks */ - if (!sourceSize) { - memset(count, 0, maxSymbolValue + 1); - *maxSymbolValuePtr = 0; - return 0; - } - if (!maxSymbolValue) - maxSymbolValue = 255; /* 0 == default */ - - /* by stripes of 16 bytes */ - { - U32 cached = ZSTD_read32(ip); - ip += 4; - while (ip < iend - 15) { - U32 c = cached; - cached = ZSTD_read32(ip); - ip += 4; - Counting1[(BYTE)c]++; - Counting2[(BYTE)(c >> 8)]++; - Counting3[(BYTE)(c >> 16)]++; - Counting4[c >> 24]++; - c = cached; - cached = ZSTD_read32(ip); - ip += 4; - Counting1[(BYTE)c]++; - Counting2[(BYTE)(c >> 8)]++; - Counting3[(BYTE)(c >> 16)]++; - Counting4[c >> 24]++; - c = cached; - cached = ZSTD_read32(ip); - ip += 4; - Counting1[(BYTE)c]++; - Counting2[(BYTE)(c >> 8)]++; - Counting3[(BYTE)(c >> 16)]++; - Counting4[c >> 24]++; - c = cached; - cached = ZSTD_read32(ip); - ip += 4; - Counting1[(BYTE)c]++; - Counting2[(BYTE)(c >> 8)]++; - Counting3[(BYTE)(c >> 16)]++; - Counting4[c >> 24]++; - } - ip -= 4; - } - - /* finish last symbols */ - while (ip < iend) - Counting1[*ip++]++; - - if (checkMax) { /* verify stats will fit into destination table */ - U32 s; - for (s = 255; s > maxSymbolValue; s--) { - Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s]; - if (Counting1[s]) - return ERROR(maxSymbolValue_tooSmall); - } - } - - { - U32 s; - for (s = 0; s <= maxSymbolValue; s++) { - count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; - if (count[s] > max) - max = count[s]; - } - } - - while (!count[maxSymbolValue]) - maxSymbolValue--; - *maxSymbolValuePtr = maxSymbolValue; - return (size_t)max; -} - -/* FSE_countFast_wksp() : - * Same as FSE_countFast(), but using an externally provided scratch buffer. - * `workSpace` size must be table of >= `1024` unsigned */ -size_t FSE_countFast_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace) -{ - if (sourceSize < 1500) - return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); - return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); -} - -/* FSE_count_wksp() : - * Same as FSE_count(), but using an externally provided scratch buffer. - * `workSpace` size must be table of >= `1024` unsigned */ -size_t FSE_count_wksp(unsigned *count, unsigned *maxSymbolValuePtr, const void *source, size_t sourceSize, unsigned *workSpace) -{ - if (*maxSymbolValuePtr < 255) - return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); - *maxSymbolValuePtr = 255; - return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); -} - -/*-************************************************************** -* FSE Compression Code -****************************************************************/ -/*! FSE_sizeof_CTable() : - FSE_CTable is a variable size structure which contains : - `U16 tableLog;` - `U16 maxSymbolValue;` - `U16 nextStateNumber[1 << tableLog];` // This size is variable - `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable -Allocation is manual (C standard does not support variable-size structures). -*/ -size_t FSE_sizeof_CTable(unsigned maxSymbolValue, unsigned tableLog) -{ - if (tableLog > FSE_MAX_TABLELOG) - return ERROR(tableLog_tooLarge); - return FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue) * sizeof(U32); -} - -/* provides the minimum logSize to safely represent a distribution */ -static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue) -{ - U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1; - U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2; - U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols; - return minBits; -} - -unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus) -{ - U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus; - U32 tableLog = maxTableLog; - U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue); - if (tableLog == 0) - tableLog = FSE_DEFAULT_TABLELOG; - if (maxBitsSrc < tableLog) - tableLog = maxBitsSrc; /* Accuracy can be reduced */ - if (minBits > tableLog) - tableLog = minBits; /* Need a minimum to safely represent all symbol values */ - if (tableLog < FSE_MIN_TABLELOG) - tableLog = FSE_MIN_TABLELOG; - if (tableLog > FSE_MAX_TABLELOG) - tableLog = FSE_MAX_TABLELOG; - return tableLog; -} - -unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) -{ - return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2); -} - -/* Secondary normalization method. - To be used when primary method fails. */ - -static size_t FSE_normalizeM2(short *norm, U32 tableLog, const unsigned *count, size_t total, U32 maxSymbolValue) -{ - short const NOT_YET_ASSIGNED = -2; - U32 s; - U32 distributed = 0; - U32 ToDistribute; - - /* Init */ - U32 const lowThreshold = (U32)(total >> tableLog); - U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); - - for (s = 0; s <= maxSymbolValue; s++) { - if (count[s] == 0) { - norm[s] = 0; - continue; - } - if (count[s] <= lowThreshold) { - norm[s] = -1; - distributed++; - total -= count[s]; - continue; - } - if (count[s] <= lowOne) { - norm[s] = 1; - distributed++; - total -= count[s]; - continue; - } - - norm[s] = NOT_YET_ASSIGNED; - } - ToDistribute = (1 << tableLog) - distributed; - - if ((total / ToDistribute) > lowOne) { - /* risk of rounding to zero */ - lowOne = (U32)((total * 3) / (ToDistribute * 2)); - for (s = 0; s <= maxSymbolValue; s++) { - if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) { - norm[s] = 1; - distributed++; - total -= count[s]; - continue; - } - } - ToDistribute = (1 << tableLog) - distributed; - } - - if (distributed == maxSymbolValue + 1) { - /* all values are pretty poor; - probably incompressible data (should have already been detected); - find max, then give all remaining points to max */ - U32 maxV = 0, maxC = 0; - for (s = 0; s <= maxSymbolValue; s++) - if (count[s] > maxC) - maxV = s, maxC = count[s]; - norm[maxV] += (short)ToDistribute; - return 0; - } - - if (total == 0) { - /* all of the symbols were low enough for the lowOne or lowThreshold */ - for (s = 0; ToDistribute > 0; s = (s + 1) % (maxSymbolValue + 1)) - if (norm[s] > 0) - ToDistribute--, norm[s]++; - return 0; - } - - { - U64 const vStepLog = 62 - tableLog; - U64 const mid = (1ULL << (vStepLog - 1)) - 1; - U64 const rStep = div_u64((((U64)1 << vStepLog) * ToDistribute) + mid, (U32)total); /* scale on remaining */ - U64 tmpTotal = mid; - for (s = 0; s <= maxSymbolValue; s++) { - if (norm[s] == NOT_YET_ASSIGNED) { - U64 const end = tmpTotal + (count[s] * rStep); - U32 const sStart = (U32)(tmpTotal >> vStepLog); - U32 const sEnd = (U32)(end >> vStepLog); - U32 const weight = sEnd - sStart; - if (weight < 1) - return ERROR(GENERIC); - norm[s] = (short)weight; - tmpTotal = end; - } - } - } - - return 0; -} - -size_t FSE_normalizeCount(short *normalizedCounter, unsigned tableLog, const unsigned *count, size_t total, unsigned maxSymbolValue) -{ - /* Sanity checks */ - if (tableLog == 0) - tableLog = FSE_DEFAULT_TABLELOG; - if (tableLog < FSE_MIN_TABLELOG) - return ERROR(GENERIC); /* Unsupported size */ - if (tableLog > FSE_MAX_TABLELOG) - return ERROR(tableLog_tooLarge); /* Unsupported size */ - if (tableLog < FSE_minTableLog(total, maxSymbolValue)) - return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ - - { - U32 const rtbTable[] = {0, 473195, 504333, 520860, 550000, 700000, 750000, 830000}; - U64 const scale = 62 - tableLog; - U64 const step = div_u64((U64)1 << 62, (U32)total); /* <== here, one division ! */ - U64 const vStep = 1ULL << (scale - 20); - int stillToDistribute = 1 << tableLog; - unsigned s; - unsigned largest = 0; - short largestP = 0; - U32 lowThreshold = (U32)(total >> tableLog); - - for (s = 0; s <= maxSymbolValue; s++) { - if (count[s] == total) - return 0; /* rle special case */ - if (count[s] == 0) { - normalizedCounter[s] = 0; - continue; - } - if (count[s] <= lowThreshold) { - normalizedCounter[s] = -1; - stillToDistribute--; - } else { - short proba = (short)((count[s] * step) >> scale); - if (proba < 8) { - U64 restToBeat = vStep * rtbTable[proba]; - proba += (count[s] * step) - ((U64)proba << scale) > restToBeat; - } - if (proba > largestP) - largestP = proba, largest = s; - normalizedCounter[s] = proba; - stillToDistribute -= proba; - } - } - if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { - /* corner case, need another normalization method */ - size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); - if (FSE_isError(errorCode)) - return errorCode; - } else - normalizedCounter[largest] += (short)stillToDistribute; - } - - return tableLog; -} - -/* fake FSE_CTable, for raw (uncompressed) input */ -size_t FSE_buildCTable_raw(FSE_CTable *ct, unsigned nbBits) -{ - const unsigned tableSize = 1 << nbBits; - const unsigned tableMask = tableSize - 1; - const unsigned maxSymbolValue = tableMask; - void *const ptr = ct; - U16 *const tableU16 = ((U16 *)ptr) + 2; - void *const FSCT = ((U32 *)ptr) + 1 /* header */ + (tableSize >> 1); /* assumption : tableLog >= 1 */ - FSE_symbolCompressionTransform *const symbolTT = (FSE_symbolCompressionTransform *)(FSCT); - unsigned s; - - /* Sanity checks */ - if (nbBits < 1) - return ERROR(GENERIC); /* min size */ - - /* header */ - tableU16[-2] = (U16)nbBits; - tableU16[-1] = (U16)maxSymbolValue; - - /* Build table */ - for (s = 0; s < tableSize; s++) - tableU16[s] = (U16)(tableSize + s); - - /* Build Symbol Transformation Table */ - { - const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); - for (s = 0; s <= maxSymbolValue; s++) { - symbolTT[s].deltaNbBits = deltaNbBits; - symbolTT[s].deltaFindState = s - 1; - } - } - - return 0; -} - -/* fake FSE_CTable, for rle input (always same symbol) */ -size_t FSE_buildCTable_rle(FSE_CTable *ct, BYTE symbolValue) -{ - void *ptr = ct; - U16 *tableU16 = ((U16 *)ptr) + 2; - void *FSCTptr = (U32 *)ptr + 2; - FSE_symbolCompressionTransform *symbolTT = (FSE_symbolCompressionTransform *)FSCTptr; - - /* header */ - tableU16[-2] = (U16)0; - tableU16[-1] = (U16)symbolValue; - - /* Build table */ - tableU16[0] = 0; - tableU16[1] = 0; /* just in case */ - - /* Build Symbol Transformation Table */ - symbolTT[symbolValue].deltaNbBits = 0; - symbolTT[symbolValue].deltaFindState = 0; - - return 0; -} - -static size_t FSE_compress_usingCTable_generic(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct, const unsigned fast) -{ - const BYTE *const istart = (const BYTE *)src; - const BYTE *const iend = istart + srcSize; - const BYTE *ip = iend; - - BIT_CStream_t bitC; - FSE_CState_t CState1, CState2; - - /* init */ - if (srcSize <= 2) - return 0; - { - size_t const initError = BIT_initCStream(&bitC, dst, dstSize); - if (FSE_isError(initError)) - return 0; /* not enough space available to write a bitstream */ - } - -#define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) - - if (srcSize & 1) { - FSE_initCState2(&CState1, ct, *--ip); - FSE_initCState2(&CState2, ct, *--ip); - FSE_encodeSymbol(&bitC, &CState1, *--ip); - FSE_FLUSHBITS(&bitC); - } else { - FSE_initCState2(&CState2, ct, *--ip); - FSE_initCState2(&CState1, ct, *--ip); - } - - /* join to mod 4 */ - srcSize -= 2; - if ((sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) && (srcSize & 2)) { /* test bit 2 */ - FSE_encodeSymbol(&bitC, &CState2, *--ip); - FSE_encodeSymbol(&bitC, &CState1, *--ip); - FSE_FLUSHBITS(&bitC); - } - - /* 2 or 4 encoding per loop */ - while (ip > istart) { - - FSE_encodeSymbol(&bitC, &CState2, *--ip); - - if (sizeof(bitC.bitContainer) * 8 < FSE_MAX_TABLELOG * 2 + 7) /* this test must be static */ - FSE_FLUSHBITS(&bitC); - - FSE_encodeSymbol(&bitC, &CState1, *--ip); - - if (sizeof(bitC.bitContainer) * 8 > FSE_MAX_TABLELOG * 4 + 7) { /* this test must be static */ - FSE_encodeSymbol(&bitC, &CState2, *--ip); - FSE_encodeSymbol(&bitC, &CState1, *--ip); - } - - FSE_FLUSHBITS(&bitC); - } - - FSE_flushCState(&bitC, &CState2); - FSE_flushCState(&bitC, &CState1); - return BIT_closeCStream(&bitC); -} - -size_t FSE_compress_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const FSE_CTable *ct) -{ - unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); - - if (fast) - return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); - else - return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0); -} - -size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } diff --git a/lib/zstd/fse_decompress.c b/lib/zstd/fse_decompress.c deleted file mode 100644 index 0b353530fb3f..000000000000 --- a/lib/zstd/fse_decompress.c +++ /dev/null @@ -1,325 +0,0 @@ -/* - * FSE : Finite State Entropy decoder - * Copyright (C) 2013-2015, Yann Collet. - * - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - */ - -/* ************************************************************** -* Compiler specifics -****************************************************************/ -#define FORCE_INLINE static __always_inline - -/* ************************************************************** -* Includes -****************************************************************/ -#include "bitstream.h" -#include "fse.h" -#include "zstd_internal.h" -#include -#include -#include /* memcpy, memset */ - -/* ************************************************************** -* Error Management -****************************************************************/ -#define FSE_isError ERR_isError -#define FSE_STATIC_ASSERT(c) \ - { \ - enum { FSE_static_assert = 1 / (int)(!!(c)) }; \ - } /* use only *after* variable declarations */ - -/* ************************************************************** -* Templates -****************************************************************/ -/* - designed to be included - for type-specific functions (template emulation in C) - Objective is to write these functions only once, for improved maintenance -*/ - -/* safety checks */ -#ifndef FSE_FUNCTION_EXTENSION -#error "FSE_FUNCTION_EXTENSION must be defined" -#endif -#ifndef FSE_FUNCTION_TYPE -#error "FSE_FUNCTION_TYPE must be defined" -#endif - -/* Function names */ -#define FSE_CAT(X, Y) X##Y -#define FSE_FUNCTION_NAME(X, Y) FSE_CAT(X, Y) -#define FSE_TYPE_NAME(X, Y) FSE_CAT(X, Y) - -/* Function templates */ - -size_t FSE_buildDTable_wksp(FSE_DTable *dt, const short *normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void *workspace, size_t workspaceSize) -{ - void *const tdPtr = dt + 1; /* because *dt is unsigned, 32-bits aligned on 32-bits */ - FSE_DECODE_TYPE *const tableDecode = (FSE_DECODE_TYPE *)(tdPtr); - U16 *symbolNext = (U16 *)workspace; - - U32 const maxSV1 = maxSymbolValue + 1; - U32 const tableSize = 1 << tableLog; - U32 highThreshold = tableSize - 1; - - /* Sanity Checks */ - if (workspaceSize < sizeof(U16) * (FSE_MAX_SYMBOL_VALUE + 1)) - return ERROR(tableLog_tooLarge); - if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) - return ERROR(maxSymbolValue_tooLarge); - if (tableLog > FSE_MAX_TABLELOG) - return ERROR(tableLog_tooLarge); - - /* Init, lay down lowprob symbols */ - { - FSE_DTableHeader DTableH; - DTableH.tableLog = (U16)tableLog; - DTableH.fastMode = 1; - { - S16 const largeLimit = (S16)(1 << (tableLog - 1)); - U32 s; - for (s = 0; s < maxSV1; s++) { - if (normalizedCounter[s] == -1) { - tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s; - symbolNext[s] = 1; - } else { - if (normalizedCounter[s] >= largeLimit) - DTableH.fastMode = 0; - symbolNext[s] = normalizedCounter[s]; - } - } - } - memcpy(dt, &DTableH, sizeof(DTableH)); - } - - /* Spread symbols */ - { - U32 const tableMask = tableSize - 1; - U32 const step = FSE_TABLESTEP(tableSize); - U32 s, position = 0; - for (s = 0; s < maxSV1; s++) { - int i; - for (i = 0; i < normalizedCounter[s]; i++) { - tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s; - position = (position + step) & tableMask; - while (position > highThreshold) - position = (position + step) & tableMask; /* lowprob area */ - } - } - if (position != 0) - return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */ - } - - /* Build Decoding table */ - { - U32 u; - for (u = 0; u < tableSize; u++) { - FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol); - U16 nextState = symbolNext[symbol]++; - tableDecode[u].nbBits = (BYTE)(tableLog - BIT_highbit32((U32)nextState)); - tableDecode[u].newState = (U16)((nextState << tableDecode[u].nbBits) - tableSize); - } - } - - return 0; -} - -/*-******************************************************* -* Decompression (Byte symbols) -*********************************************************/ -size_t FSE_buildDTable_rle(FSE_DTable *dt, BYTE symbolValue) -{ - void *ptr = dt; - FSE_DTableHeader *const DTableH = (FSE_DTableHeader *)ptr; - void *dPtr = dt + 1; - FSE_decode_t *const cell = (FSE_decode_t *)dPtr; - - DTableH->tableLog = 0; - DTableH->fastMode = 0; - - cell->newState = 0; - cell->symbol = symbolValue; - cell->nbBits = 0; - - return 0; -} - -size_t FSE_buildDTable_raw(FSE_DTable *dt, unsigned nbBits) -{ - void *ptr = dt; - FSE_DTableHeader *const DTableH = (FSE_DTableHeader *)ptr; - void *dPtr = dt + 1; - FSE_decode_t *const dinfo = (FSE_decode_t *)dPtr; - const unsigned tableSize = 1 << nbBits; - const unsigned tableMask = tableSize - 1; - const unsigned maxSV1 = tableMask + 1; - unsigned s; - - /* Sanity checks */ - if (nbBits < 1) - return ERROR(GENERIC); /* min size */ - - /* Build Decoding Table */ - DTableH->tableLog = (U16)nbBits; - DTableH->fastMode = 1; - for (s = 0; s < maxSV1; s++) { - dinfo[s].newState = 0; - dinfo[s].symbol = (BYTE)s; - dinfo[s].nbBits = (BYTE)nbBits; - } - - return 0; -} - -FORCE_INLINE size_t FSE_decompress_usingDTable_generic(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt, - const unsigned fast) -{ - BYTE *const ostart = (BYTE *)dst; - BYTE *op = ostart; - BYTE *const omax = op + maxDstSize; - BYTE *const olimit = omax - 3; - - BIT_DStream_t bitD; - FSE_DState_t state1; - FSE_DState_t state2; - - /* Init */ - CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize)); - - FSE_initDState(&state1, &bitD, dt); - FSE_initDState(&state2, &bitD, dt); - -#define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD) - - /* 4 symbols per loop */ - for (; (BIT_reloadDStream(&bitD) == BIT_DStream_unfinished) & (op < olimit); op += 4) { - op[0] = FSE_GETSYMBOL(&state1); - - if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */ - BIT_reloadDStream(&bitD); - - op[1] = FSE_GETSYMBOL(&state2); - - if (FSE_MAX_TABLELOG * 4 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */ - { - if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { - op += 2; - break; - } - } - - op[2] = FSE_GETSYMBOL(&state1); - - if (FSE_MAX_TABLELOG * 2 + 7 > sizeof(bitD.bitContainer) * 8) /* This test must be static */ - BIT_reloadDStream(&bitD); - - op[3] = FSE_GETSYMBOL(&state2); - } - - /* tail */ - /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */ - while (1) { - if (op > (omax - 2)) - return ERROR(dstSize_tooSmall); - *op++ = FSE_GETSYMBOL(&state1); - if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow) { - *op++ = FSE_GETSYMBOL(&state2); - break; - } - - if (op > (omax - 2)) - return ERROR(dstSize_tooSmall); - *op++ = FSE_GETSYMBOL(&state2); - if (BIT_reloadDStream(&bitD) == BIT_DStream_overflow) { - *op++ = FSE_GETSYMBOL(&state1); - break; - } - } - - return op - ostart; -} - -size_t FSE_decompress_usingDTable(void *dst, size_t originalSize, const void *cSrc, size_t cSrcSize, const FSE_DTable *dt) -{ - const void *ptr = dt; - const FSE_DTableHeader *DTableH = (const FSE_DTableHeader *)ptr; - const U32 fastMode = DTableH->fastMode; - - /* select fast mode (static) */ - if (fastMode) - return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1); - return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0); -} - -size_t FSE_decompress_wksp(void *dst, size_t dstCapacity, const void *cSrc, size_t cSrcSize, unsigned maxLog, void *workspace, size_t workspaceSize) -{ - const BYTE *const istart = (const BYTE *)cSrc; - const BYTE *ip = istart; - unsigned tableLog; - unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; - size_t NCountLength; - - FSE_DTable *dt; - short *counting; - size_t spaceUsed32 = 0; - - FSE_STATIC_ASSERT(sizeof(FSE_DTable) == sizeof(U32)); - - dt = (FSE_DTable *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += FSE_DTABLE_SIZE_U32(maxLog); - counting = (short *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(sizeof(short) * (FSE_MAX_SYMBOL_VALUE + 1), sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > workspaceSize) - return ERROR(tableLog_tooLarge); - workspace = (U32 *)workspace + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - - /* normal FSE decoding mode */ - NCountLength = FSE_readNCount(counting, &maxSymbolValue, &tableLog, istart, cSrcSize); - if (FSE_isError(NCountLength)) - return NCountLength; - // if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining - // case : NCountLength==cSrcSize */ - if (tableLog > maxLog) - return ERROR(tableLog_tooLarge); - ip += NCountLength; - cSrcSize -= NCountLength; - - CHECK_F(FSE_buildDTable_wksp(dt, counting, maxSymbolValue, tableLog, workspace, workspaceSize)); - - return FSE_decompress_usingDTable(dst, dstCapacity, ip, cSrcSize, dt); /* always return, even if it is an error code */ -} diff --git a/lib/zstd/huf.h b/lib/zstd/huf.h deleted file mode 100644 index 2143da28d952..000000000000 --- a/lib/zstd/huf.h +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Huffman coder, part of New Generation Entropy library - * header file - * Copyright (C) 2013-2016, Yann Collet. - * - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - */ -#ifndef HUF_H_298734234 -#define HUF_H_298734234 - -/* *** Dependencies *** */ -#include /* size_t */ - -/* *** Tool functions *** */ -#define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ -size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ - -/* Error Management */ -unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */ - -/* *** Advanced function *** */ - -/** HUF_compress4X_wksp() : -* Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of >= 1024 unsigned */ -size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, - size_t wkspSize); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */ - -/* *** Dependencies *** */ -#include "mem.h" /* U32 */ - -/* *** Constants *** */ -#define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ -#define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */ -#define HUF_SYMBOLVALUE_MAX 255 - -#define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ -#if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX) -#error "HUF_TABLELOG_MAX is too large !" -#endif - -/* **************************************** -* Static allocation -******************************************/ -/* HUF buffer bounds */ -#define HUF_CTABLEBOUND 129 -#define HUF_BLOCKBOUND(size) (size + (size >> 8) + 8) /* only true if incompressible pre-filtered with fast heuristic */ -#define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ - -/* static allocation of HUF's Compression Table */ -#define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \ - U32 name##hb[maxSymbolValue + 1]; \ - void *name##hv = &(name##hb); \ - HUF_CElt *name = (HUF_CElt *)(name##hv) /* no final ; */ - -/* static allocation of HUF's DTable */ -typedef U32 HUF_DTable; -#define HUF_DTABLE_SIZE(maxTableLog) (1 + (1 << (maxTableLog))) -#define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = {((U32)((maxTableLog)-1) * 0x01000001)} -#define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = {((U32)(maxTableLog)*0x01000001)} - -/* The workspace must have alignment at least 4 and be at least this large */ -#define HUF_COMPRESS_WORKSPACE_SIZE (6 << 10) -#define HUF_COMPRESS_WORKSPACE_SIZE_U32 (HUF_COMPRESS_WORKSPACE_SIZE / sizeof(U32)) - -/* The workspace must have alignment at least 4 and be at least this large */ -#define HUF_DECOMPRESS_WORKSPACE_SIZE (3 << 10) -#define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32)) - -/* **************************************** -* Advanced decompression functions -******************************************/ -size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize); /**< decodes RLE and uncompressed */ -size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, - size_t workspaceSize); /**< considers RLE and uncompressed as errors */ -size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, - size_t workspaceSize); /**< single-symbol decoder */ -size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, - size_t workspaceSize); /**< double-symbols decoder */ - -/* **************************************** -* HUF detailed API -******************************************/ -/*! -HUF_compress() does the following: -1. count symbol occurrence from source[] into table count[] using FSE_count() -2. (optional) refine tableLog using HUF_optimalTableLog() -3. build Huffman table from count using HUF_buildCTable() -4. save Huffman table to memory buffer using HUF_writeCTable_wksp() -5. encode the data stream using HUF_compress4X_usingCTable() - -The following API allows targeting specific sub-functions for advanced tasks. -For example, it's possible to compress several blocks using the same 'CTable', -or to save and regenerate 'CTable' using external methods. -*/ -/* FSE_count() : find it within "fse.h" */ -unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue); -typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */ -size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, unsigned maxSymbolValue, unsigned huffLog, void *workspace, size_t workspaceSize); -size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable); - -typedef enum { - HUF_repeat_none, /**< Cannot use the previous table */ - HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, - 4}X_repeat */ - HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */ -} HUF_repeat; -/** HUF_compress4X_repeat() : -* Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. -* If it uses hufTable it does not modify hufTable or repeat. -* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. -* If preferRepeat then the old table will always be used if valid. */ -size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, - size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, - int preferRepeat); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */ - -/** HUF_buildCTable_wksp() : - * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. - */ -size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize); - -/*! HUF_readStats() : - Read compact Huffman tree, saved by HUF_writeCTable(). - `huffWeight` is destination buffer. - @return : size read from `src` , or an error Code . - Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */ -size_t HUF_readStats_wksp(BYTE *huffWeight, size_t hwSize, U32 *rankStats, U32 *nbSymbolsPtr, U32 *tableLogPtr, const void *src, size_t srcSize, - void *workspace, size_t workspaceSize); - -/** HUF_readCTable() : -* Loading a CTable saved with HUF_writeCTable() */ -size_t HUF_readCTable_wksp(HUF_CElt *CTable, unsigned maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize); - -/* -HUF_decompress() does the following: -1. select the decompression algorithm (X2, X4) based on pre-computed heuristics -2. build Huffman table from save, using HUF_readDTableXn() -3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable -*/ - -/** HUF_selectDecoder() : -* Tells which decoder is likely to decode faster, -* based on a set of pre-determined metrics. -* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . -* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ -U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize); - -size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize); -size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize); - -size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); -size_t HUF_decompress4X2_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); -size_t HUF_decompress4X4_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); - -/* single stream variants */ - -size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, - size_t wkspSize); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */ -size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable); -/** HUF_compress1X_repeat() : -* Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none. -* If it uses hufTable it does not modify hufTable or repeat. -* If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used. -* If preferRepeat then the old table will always be used if valid. */ -size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void *workSpace, - size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, - int preferRepeat); /**< `workSpace` must be a table of at least HUF_COMPRESS_WORKSPACE_SIZE_U32 unsigned */ - -size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize); -size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, - size_t workspaceSize); /**< single-symbol decoder */ -size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, - size_t workspaceSize); /**< double-symbols decoder */ - -size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, - const HUF_DTable *DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ -size_t HUF_decompress1X2_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); -size_t HUF_decompress1X4_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable); - -#endif /* HUF_H_298734234 */ diff --git a/lib/zstd/huf_compress.c b/lib/zstd/huf_compress.c deleted file mode 100644 index fd32838c185f..000000000000 --- a/lib/zstd/huf_compress.c +++ /dev/null @@ -1,773 +0,0 @@ -/* - * Huffman encoder, part of New Generation Entropy library - * Copyright (C) 2013-2016, Yann Collet. - * - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - */ - -/* ************************************************************** -* Includes -****************************************************************/ -#include "bitstream.h" -#include "fse.h" /* header compression */ -#include "huf.h" -#include -#include /* memcpy, memset */ - -/* ************************************************************** -* Error Management -****************************************************************/ -#define HUF_STATIC_ASSERT(c) \ - { \ - enum { HUF_static_assert = 1 / (int)(!!(c)) }; \ - } /* use only *after* variable declarations */ -#define CHECK_V_F(e, f) \ - size_t const e = f; \ - if (ERR_isError(e)) \ - return f -#define CHECK_F(f) \ - { \ - CHECK_V_F(_var_err__, f); \ - } - -/* ************************************************************** -* Utils -****************************************************************/ -unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue) -{ - return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1); -} - -/* ******************************************************* -* HUF : Huffman block compression -*********************************************************/ -/* HUF_compressWeights() : - * Same as FSE_compress(), but dedicated to huff0's weights compression. - * The use case needs much less stack memory. - * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. - */ -#define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 -size_t HUF_compressWeights_wksp(void *dst, size_t dstSize, const void *weightTable, size_t wtSize, void *workspace, size_t workspaceSize) -{ - BYTE *const ostart = (BYTE *)dst; - BYTE *op = ostart; - BYTE *const oend = ostart + dstSize; - - U32 maxSymbolValue = HUF_TABLELOG_MAX; - U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; - - FSE_CTable *CTable; - U32 *count; - S16 *norm; - size_t spaceUsed32 = 0; - - HUF_STATIC_ASSERT(sizeof(FSE_CTable) == sizeof(U32)); - - CTable = (FSE_CTable *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX); - count = (U32 *)workspace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_MAX + 1; - norm = (S16 *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(sizeof(S16) * (HUF_TABLELOG_MAX + 1), sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > workspaceSize) - return ERROR(tableLog_tooLarge); - workspace = (U32 *)workspace + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - - /* init conditions */ - if (wtSize <= 1) - return 0; /* Not compressible */ - - /* Scan input and build symbol stats */ - { - CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize)); - if (maxCount == wtSize) - return 1; /* only a single symbol in src : rle */ - if (maxCount == 1) - return 0; /* each symbol present maximum once => not compressible */ - } - - tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); - CHECK_F(FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue)); - - /* Write table description header */ - { - CHECK_V_F(hSize, FSE_writeNCount(op, oend - op, norm, maxSymbolValue, tableLog)); - op += hSize; - } - - /* Compress */ - CHECK_F(FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, workspace, workspaceSize)); - { - CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable)); - if (cSize == 0) - return 0; /* not enough space for compressed data */ - op += cSize; - } - - return op - ostart; -} - -struct HUF_CElt_s { - U16 val; - BYTE nbBits; -}; /* typedef'd to HUF_CElt within "huf.h" */ - -/*! HUF_writeCTable_wksp() : - `CTable` : Huffman tree to save, using huf representation. - @return : size of saved CTable */ -size_t HUF_writeCTable_wksp(void *dst, size_t maxDstSize, const HUF_CElt *CTable, U32 maxSymbolValue, U32 huffLog, void *workspace, size_t workspaceSize) -{ - BYTE *op = (BYTE *)dst; - U32 n; - - BYTE *bitsToWeight; - BYTE *huffWeight; - size_t spaceUsed32 = 0; - - bitsToWeight = (BYTE *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(HUF_TABLELOG_MAX + 1, sizeof(U32)) >> 2; - huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > workspaceSize) - return ERROR(tableLog_tooLarge); - workspace = (U32 *)workspace + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - - /* check conditions */ - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) - return ERROR(maxSymbolValue_tooLarge); - - /* convert to weight */ - bitsToWeight[0] = 0; - for (n = 1; n < huffLog + 1; n++) - bitsToWeight[n] = (BYTE)(huffLog + 1 - n); - for (n = 0; n < maxSymbolValue; n++) - huffWeight[n] = bitsToWeight[CTable[n].nbBits]; - - /* attempt weights compression by FSE */ - { - CHECK_V_F(hSize, HUF_compressWeights_wksp(op + 1, maxDstSize - 1, huffWeight, maxSymbolValue, workspace, workspaceSize)); - if ((hSize > 1) & (hSize < maxSymbolValue / 2)) { /* FSE compressed */ - op[0] = (BYTE)hSize; - return hSize + 1; - } - } - - /* write raw values as 4-bits (max : 15) */ - if (maxSymbolValue > (256 - 128)) - return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ - if (((maxSymbolValue + 1) / 2) + 1 > maxDstSize) - return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ - op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue - 1)); - huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ - for (n = 0; n < maxSymbolValue; n += 2) - op[(n / 2) + 1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n + 1]); - return ((maxSymbolValue + 1) / 2) + 1; -} - -size_t HUF_readCTable_wksp(HUF_CElt *CTable, U32 maxSymbolValue, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) -{ - U32 *rankVal; - BYTE *huffWeight; - U32 tableLog = 0; - U32 nbSymbols = 0; - size_t readSize; - size_t spaceUsed32 = 0; - - rankVal = (U32 *)workspace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; - huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > workspaceSize) - return ERROR(tableLog_tooLarge); - workspace = (U32 *)workspace + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - - /* get symbol weights */ - readSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize); - if (ERR_isError(readSize)) - return readSize; - - /* check result */ - if (tableLog > HUF_TABLELOG_MAX) - return ERROR(tableLog_tooLarge); - if (nbSymbols > maxSymbolValue + 1) - return ERROR(maxSymbolValue_tooSmall); - - /* Prepare base value per rank */ - { - U32 n, nextRankStart = 0; - for (n = 1; n <= tableLog; n++) { - U32 curr = nextRankStart; - nextRankStart += (rankVal[n] << (n - 1)); - rankVal[n] = curr; - } - } - - /* fill nbBits */ - { - U32 n; - for (n = 0; n < nbSymbols; n++) { - const U32 w = huffWeight[n]; - CTable[n].nbBits = (BYTE)(tableLog + 1 - w); - } - } - - /* fill val */ - { - U16 nbPerRank[HUF_TABLELOG_MAX + 2] = {0}; /* support w=0=>n=tableLog+1 */ - U16 valPerRank[HUF_TABLELOG_MAX + 2] = {0}; - { - U32 n; - for (n = 0; n < nbSymbols; n++) - nbPerRank[CTable[n].nbBits]++; - } - /* determine stating value per rank */ - valPerRank[tableLog + 1] = 0; /* for w==0 */ - { - U16 min = 0; - U32 n; - for (n = tableLog; n > 0; n--) { /* start at n=tablelog <-> w=1 */ - valPerRank[n] = min; /* get starting value within each rank */ - min += nbPerRank[n]; - min >>= 1; - } - } - /* assign value within rank, symbol order */ - { - U32 n; - for (n = 0; n <= maxSymbolValue; n++) - CTable[n].val = valPerRank[CTable[n].nbBits]++; - } - } - - return readSize; -} - -typedef struct nodeElt_s { - U32 count; - U16 parent; - BYTE byte; - BYTE nbBits; -} nodeElt; - -static U32 HUF_setMaxHeight(nodeElt *huffNode, U32 lastNonNull, U32 maxNbBits) -{ - const U32 largestBits = huffNode[lastNonNull].nbBits; - if (largestBits <= maxNbBits) - return largestBits; /* early exit : no elt > maxNbBits */ - - /* there are several too large elements (at least >= 2) */ - { - int totalCost = 0; - const U32 baseCost = 1 << (largestBits - maxNbBits); - U32 n = lastNonNull; - - while (huffNode[n].nbBits > maxNbBits) { - totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)); - huffNode[n].nbBits = (BYTE)maxNbBits; - n--; - } /* n stops at huffNode[n].nbBits <= maxNbBits */ - while (huffNode[n].nbBits == maxNbBits) - n--; /* n end at index of smallest symbol using < maxNbBits */ - - /* renorm totalCost */ - totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */ - - /* repay normalized cost */ - { - U32 const noSymbol = 0xF0F0F0F0; - U32 rankLast[HUF_TABLELOG_MAX + 2]; - int pos; - - /* Get pos of last (smallest) symbol per rank */ - memset(rankLast, 0xF0, sizeof(rankLast)); - { - U32 currNbBits = maxNbBits; - for (pos = n; pos >= 0; pos--) { - if (huffNode[pos].nbBits >= currNbBits) - continue; - currNbBits = huffNode[pos].nbBits; /* < maxNbBits */ - rankLast[maxNbBits - currNbBits] = pos; - } - } - - while (totalCost > 0) { - U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1; - for (; nBitsToDecrease > 1; nBitsToDecrease--) { - U32 highPos = rankLast[nBitsToDecrease]; - U32 lowPos = rankLast[nBitsToDecrease - 1]; - if (highPos == noSymbol) - continue; - if (lowPos == noSymbol) - break; - { - U32 const highTotal = huffNode[highPos].count; - U32 const lowTotal = 2 * huffNode[lowPos].count; - if (highTotal <= lowTotal) - break; - } - } - /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */ - /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */ - while ((nBitsToDecrease <= HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) - nBitsToDecrease++; - totalCost -= 1 << (nBitsToDecrease - 1); - if (rankLast[nBitsToDecrease - 1] == noSymbol) - rankLast[nBitsToDecrease - 1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */ - huffNode[rankLast[nBitsToDecrease]].nbBits++; - if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */ - rankLast[nBitsToDecrease] = noSymbol; - else { - rankLast[nBitsToDecrease]--; - if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits - nBitsToDecrease) - rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */ - } - } /* while (totalCost > 0) */ - - while (totalCost < 0) { /* Sometimes, cost correction overshoot */ - if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 - (using maxNbBits) */ - while (huffNode[n].nbBits == maxNbBits) - n--; - huffNode[n + 1].nbBits--; - rankLast[1] = n + 1; - totalCost++; - continue; - } - huffNode[rankLast[1] + 1].nbBits--; - rankLast[1]++; - totalCost++; - } - } - } /* there are several too large elements (at least >= 2) */ - - return maxNbBits; -} - -typedef struct { - U32 base; - U32 curr; -} rankPos; - -static void HUF_sort(nodeElt *huffNode, const U32 *count, U32 maxSymbolValue) -{ - rankPos rank[32]; - U32 n; - - memset(rank, 0, sizeof(rank)); - for (n = 0; n <= maxSymbolValue; n++) { - U32 r = BIT_highbit32(count[n] + 1); - rank[r].base++; - } - for (n = 30; n > 0; n--) - rank[n - 1].base += rank[n].base; - for (n = 0; n < 32; n++) - rank[n].curr = rank[n].base; - for (n = 0; n <= maxSymbolValue; n++) { - U32 const c = count[n]; - U32 const r = BIT_highbit32(c + 1) + 1; - U32 pos = rank[r].curr++; - while ((pos > rank[r].base) && (c > huffNode[pos - 1].count)) - huffNode[pos] = huffNode[pos - 1], pos--; - huffNode[pos].count = c; - huffNode[pos].byte = (BYTE)n; - } -} - -/** HUF_buildCTable_wksp() : - * Same as HUF_buildCTable(), but using externally allocated scratch buffer. - * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. - */ -#define STARTNODE (HUF_SYMBOLVALUE_MAX + 1) -typedef nodeElt huffNodeTable[2 * HUF_SYMBOLVALUE_MAX + 1 + 1]; -size_t HUF_buildCTable_wksp(HUF_CElt *tree, const U32 *count, U32 maxSymbolValue, U32 maxNbBits, void *workSpace, size_t wkspSize) -{ - nodeElt *const huffNode0 = (nodeElt *)workSpace; - nodeElt *const huffNode = huffNode0 + 1; - U32 n, nonNullRank; - int lowS, lowN; - U16 nodeNb = STARTNODE; - U32 nodeRoot; - - /* safety checks */ - if (wkspSize < sizeof(huffNodeTable)) - return ERROR(GENERIC); /* workSpace is not large enough */ - if (maxNbBits == 0) - maxNbBits = HUF_TABLELOG_DEFAULT; - if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) - return ERROR(GENERIC); - memset(huffNode0, 0, sizeof(huffNodeTable)); - - /* sort, decreasing order */ - HUF_sort(huffNode, count, maxSymbolValue); - - /* init for parents */ - nonNullRank = maxSymbolValue; - while (huffNode[nonNullRank].count == 0) - nonNullRank--; - lowS = nonNullRank; - nodeRoot = nodeNb + lowS - 1; - lowN = nodeNb; - huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS - 1].count; - huffNode[lowS].parent = huffNode[lowS - 1].parent = nodeNb; - nodeNb++; - lowS -= 2; - for (n = nodeNb; n <= nodeRoot; n++) - huffNode[n].count = (U32)(1U << 30); - huffNode0[0].count = (U32)(1U << 31); /* fake entry, strong barrier */ - - /* create parents */ - while (nodeNb <= nodeRoot) { - U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; - U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++; - huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count; - huffNode[n1].parent = huffNode[n2].parent = nodeNb; - nodeNb++; - } - - /* distribute weights (unlimited tree height) */ - huffNode[nodeRoot].nbBits = 0; - for (n = nodeRoot - 1; n >= STARTNODE; n--) - huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1; - for (n = 0; n <= nonNullRank; n++) - huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1; - - /* enforce maxTableLog */ - maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits); - - /* fill result into tree (val, nbBits) */ - { - U16 nbPerRank[HUF_TABLELOG_MAX + 1] = {0}; - U16 valPerRank[HUF_TABLELOG_MAX + 1] = {0}; - if (maxNbBits > HUF_TABLELOG_MAX) - return ERROR(GENERIC); /* check fit into table */ - for (n = 0; n <= nonNullRank; n++) - nbPerRank[huffNode[n].nbBits]++; - /* determine stating value per rank */ - { - U16 min = 0; - for (n = maxNbBits; n > 0; n--) { - valPerRank[n] = min; /* get starting value within each rank */ - min += nbPerRank[n]; - min >>= 1; - } - } - for (n = 0; n <= maxSymbolValue; n++) - tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */ - for (n = 0; n <= maxSymbolValue; n++) - tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */ - } - - return maxNbBits; -} - -static size_t HUF_estimateCompressedSize(HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue) -{ - size_t nbBits = 0; - int s; - for (s = 0; s <= (int)maxSymbolValue; ++s) { - nbBits += CTable[s].nbBits * count[s]; - } - return nbBits >> 3; -} - -static int HUF_validateCTable(const HUF_CElt *CTable, const unsigned *count, unsigned maxSymbolValue) -{ - int bad = 0; - int s; - for (s = 0; s <= (int)maxSymbolValue; ++s) { - bad |= (count[s] != 0) & (CTable[s].nbBits == 0); - } - return !bad; -} - -static void HUF_encodeSymbol(BIT_CStream_t *bitCPtr, U32 symbol, const HUF_CElt *CTable) -{ - BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); -} - -size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); } - -#define HUF_FLUSHBITS(s) BIT_flushBits(s) - -#define HUF_FLUSHBITS_1(stream) \ - if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 2 + 7) \ - HUF_FLUSHBITS(stream) - -#define HUF_FLUSHBITS_2(stream) \ - if (sizeof((stream)->bitContainer) * 8 < HUF_TABLELOG_MAX * 4 + 7) \ - HUF_FLUSHBITS(stream) - -size_t HUF_compress1X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable) -{ - const BYTE *ip = (const BYTE *)src; - BYTE *const ostart = (BYTE *)dst; - BYTE *const oend = ostart + dstSize; - BYTE *op = ostart; - size_t n; - BIT_CStream_t bitC; - - /* init */ - if (dstSize < 8) - return 0; /* not enough space to compress */ - { - size_t const initErr = BIT_initCStream(&bitC, op, oend - op); - if (HUF_isError(initErr)) - return 0; - } - - n = srcSize & ~3; /* join to mod 4 */ - switch (srcSize & 3) { - case 3: HUF_encodeSymbol(&bitC, ip[n + 2], CTable); HUF_FLUSHBITS_2(&bitC); - fallthrough; - case 2: HUF_encodeSymbol(&bitC, ip[n + 1], CTable); HUF_FLUSHBITS_1(&bitC); - fallthrough; - case 1: HUF_encodeSymbol(&bitC, ip[n + 0], CTable); HUF_FLUSHBITS(&bitC); - fallthrough; - case 0: - default:; - } - - for (; n > 0; n -= 4) { /* note : n&3==0 at this stage */ - HUF_encodeSymbol(&bitC, ip[n - 1], CTable); - HUF_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n - 2], CTable); - HUF_FLUSHBITS_2(&bitC); - HUF_encodeSymbol(&bitC, ip[n - 3], CTable); - HUF_FLUSHBITS_1(&bitC); - HUF_encodeSymbol(&bitC, ip[n - 4], CTable); - HUF_FLUSHBITS(&bitC); - } - - return BIT_closeCStream(&bitC); -} - -size_t HUF_compress4X_usingCTable(void *dst, size_t dstSize, const void *src, size_t srcSize, const HUF_CElt *CTable) -{ - size_t const segmentSize = (srcSize + 3) / 4; /* first 3 segments */ - const BYTE *ip = (const BYTE *)src; - const BYTE *const iend = ip + srcSize; - BYTE *const ostart = (BYTE *)dst; - BYTE *const oend = ostart + dstSize; - BYTE *op = ostart; - - if (dstSize < 6 + 1 + 1 + 1 + 8) - return 0; /* minimum space to compress successfully */ - if (srcSize < 12) - return 0; /* no saving possible : too small input */ - op += 6; /* jumpTable */ - - { - CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable)); - if (cSize == 0) - return 0; - ZSTD_writeLE16(ostart, (U16)cSize); - op += cSize; - } - - ip += segmentSize; - { - CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable)); - if (cSize == 0) - return 0; - ZSTD_writeLE16(ostart + 2, (U16)cSize); - op += cSize; - } - - ip += segmentSize; - { - CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, segmentSize, CTable)); - if (cSize == 0) - return 0; - ZSTD_writeLE16(ostart + 4, (U16)cSize); - op += cSize; - } - - ip += segmentSize; - { - CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend - op, ip, iend - ip, CTable)); - if (cSize == 0) - return 0; - op += cSize; - } - - return op - ostart; -} - -static size_t HUF_compressCTable_internal(BYTE *const ostart, BYTE *op, BYTE *const oend, const void *src, size_t srcSize, unsigned singleStream, - const HUF_CElt *CTable) -{ - size_t const cSize = - singleStream ? HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable); - if (HUF_isError(cSize)) { - return cSize; - } - if (cSize == 0) { - return 0; - } /* uncompressible */ - op += cSize; - /* check compressibility */ - if ((size_t)(op - ostart) >= srcSize - 1) { - return 0; - } - return op - ostart; -} - -/* `workSpace` must a table of at least 1024 unsigned */ -static size_t HUF_compress_internal(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, - unsigned singleStream, void *workSpace, size_t wkspSize, HUF_CElt *oldHufTable, HUF_repeat *repeat, int preferRepeat) -{ - BYTE *const ostart = (BYTE *)dst; - BYTE *const oend = ostart + dstSize; - BYTE *op = ostart; - - U32 *count; - size_t const countSize = sizeof(U32) * (HUF_SYMBOLVALUE_MAX + 1); - HUF_CElt *CTable; - size_t const CTableSize = sizeof(HUF_CElt) * (HUF_SYMBOLVALUE_MAX + 1); - - /* checks & inits */ - if (wkspSize < sizeof(huffNodeTable) + countSize + CTableSize) - return ERROR(GENERIC); - if (!srcSize) - return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */ - if (!dstSize) - return 0; /* cannot fit within dst budget */ - if (srcSize > HUF_BLOCKSIZE_MAX) - return ERROR(srcSize_wrong); /* curr block size limit */ - if (huffLog > HUF_TABLELOG_MAX) - return ERROR(tableLog_tooLarge); - if (!maxSymbolValue) - maxSymbolValue = HUF_SYMBOLVALUE_MAX; - if (!huffLog) - huffLog = HUF_TABLELOG_DEFAULT; - - count = (U32 *)workSpace; - workSpace = (BYTE *)workSpace + countSize; - wkspSize -= countSize; - CTable = (HUF_CElt *)workSpace; - workSpace = (BYTE *)workSpace + CTableSize; - wkspSize -= CTableSize; - - /* Heuristic : If we don't need to check the validity of the old table use the old table for small inputs */ - if (preferRepeat && repeat && *repeat == HUF_repeat_valid) { - return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); - } - - /* Scan input and build symbol stats */ - { - CHECK_V_F(largest, FSE_count_wksp(count, &maxSymbolValue, (const BYTE *)src, srcSize, (U32 *)workSpace)); - if (largest == srcSize) { - *ostart = ((const BYTE *)src)[0]; - return 1; - } /* single symbol, rle */ - if (largest <= (srcSize >> 7) + 1) - return 0; /* Fast heuristic : not compressible enough */ - } - - /* Check validity of previous table */ - if (repeat && *repeat == HUF_repeat_check && !HUF_validateCTable(oldHufTable, count, maxSymbolValue)) { - *repeat = HUF_repeat_none; - } - /* Heuristic : use existing table for small inputs */ - if (preferRepeat && repeat && *repeat != HUF_repeat_none) { - return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); - } - - /* Build Huffman Tree */ - huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); - { - CHECK_V_F(maxBits, HUF_buildCTable_wksp(CTable, count, maxSymbolValue, huffLog, workSpace, wkspSize)); - huffLog = (U32)maxBits; - /* Zero the unused symbols so we can check it for validity */ - memset(CTable + maxSymbolValue + 1, 0, CTableSize - (maxSymbolValue + 1) * sizeof(HUF_CElt)); - } - - /* Write table description header */ - { - CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, CTable, maxSymbolValue, huffLog, workSpace, wkspSize)); - /* Check if using the previous table will be beneficial */ - if (repeat && *repeat != HUF_repeat_none) { - size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, count, maxSymbolValue); - size_t const newSize = HUF_estimateCompressedSize(CTable, count, maxSymbolValue); - if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) { - return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, oldHufTable); - } - } - /* Use the new table */ - if (hSize + 12ul >= srcSize) { - return 0; - } - op += hSize; - if (repeat) { - *repeat = HUF_repeat_none; - } - if (oldHufTable) { - memcpy(oldHufTable, CTable, CTableSize); - } /* Save the new table */ - } - return HUF_compressCTable_internal(ostart, op, oend, src, srcSize, singleStream, CTable); -} - -size_t HUF_compress1X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace, - size_t wkspSize) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, NULL, NULL, 0); -} - -size_t HUF_compress1X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace, - size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize, hufTable, repeat, - preferRepeat); -} - -size_t HUF_compress4X_wksp(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace, - size_t wkspSize) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, NULL, NULL, 0); -} - -size_t HUF_compress4X_repeat(void *dst, size_t dstSize, const void *src, size_t srcSize, unsigned maxSymbolValue, unsigned huffLog, void *workSpace, - size_t wkspSize, HUF_CElt *hufTable, HUF_repeat *repeat, int preferRepeat) -{ - return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize, hufTable, repeat, - preferRepeat); -} diff --git a/lib/zstd/huf_decompress.c b/lib/zstd/huf_decompress.c deleted file mode 100644 index 6526482047dc..000000000000 --- a/lib/zstd/huf_decompress.c +++ /dev/null @@ -1,960 +0,0 @@ -/* - * Huffman decoder, part of New Generation Entropy library - * Copyright (C) 2013-2016, Yann Collet. - * - * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following disclaimer - * in the documentation and/or other materials provided with the - * distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - * - * You can contact the author at : - * - Source repository : https://github.com/Cyan4973/FiniteStateEntropy - */ - -/* ************************************************************** -* Compiler specifics -****************************************************************/ -#define FORCE_INLINE static __always_inline - -/* ************************************************************** -* Dependencies -****************************************************************/ -#include "bitstream.h" /* BIT_* */ -#include "fse.h" /* header compression */ -#include "huf.h" -#include -#include -#include /* memcpy, memset */ - -/* ************************************************************** -* Error Management -****************************************************************/ -#define HUF_STATIC_ASSERT(c) \ - { \ - enum { HUF_static_assert = 1 / (int)(!!(c)) }; \ - } /* use only *after* variable declarations */ - -/*-***************************/ -/* generic DTableDesc */ -/*-***************************/ - -typedef struct { - BYTE maxTableLog; - BYTE tableType; - BYTE tableLog; - BYTE reserved; -} DTableDesc; - -static DTableDesc HUF_getDTableDesc(const HUF_DTable *table) -{ - DTableDesc dtd; - memcpy(&dtd, table, sizeof(dtd)); - return dtd; -} - -/*-***************************/ -/* single-symbol decoding */ -/*-***************************/ - -typedef struct { - BYTE byte; - BYTE nbBits; -} HUF_DEltX2; /* single-symbol decoding */ - -size_t HUF_readDTableX2_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) -{ - U32 tableLog = 0; - U32 nbSymbols = 0; - size_t iSize; - void *const dtPtr = DTable + 1; - HUF_DEltX2 *const dt = (HUF_DEltX2 *)dtPtr; - - U32 *rankVal; - BYTE *huffWeight; - size_t spaceUsed32 = 0; - - rankVal = (U32 *)workspace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_ABSOLUTEMAX + 1; - huffWeight = (BYTE *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > workspaceSize) - return ERROR(tableLog_tooLarge); - workspace = (U32 *)workspace + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - - HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable)); - /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */ - - iSize = HUF_readStats_wksp(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize); - if (HUF_isError(iSize)) - return iSize; - - /* Table header */ - { - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (tableLog > (U32)(dtd.maxTableLog + 1)) - return ERROR(tableLog_tooLarge); /* DTable too small, Huffman tree cannot fit in */ - dtd.tableType = 0; - dtd.tableLog = (BYTE)tableLog; - memcpy(DTable, &dtd, sizeof(dtd)); - } - - /* Calculate starting value for each rank */ - { - U32 n, nextRankStart = 0; - for (n = 1; n < tableLog + 1; n++) { - U32 const curr = nextRankStart; - nextRankStart += (rankVal[n] << (n - 1)); - rankVal[n] = curr; - } - } - - /* fill DTable */ - { - U32 n; - for (n = 0; n < nbSymbols; n++) { - U32 const w = huffWeight[n]; - U32 const length = (1 << w) >> 1; - U32 u; - HUF_DEltX2 D; - D.byte = (BYTE)n; - D.nbBits = (BYTE)(tableLog + 1 - w); - for (u = rankVal[w]; u < rankVal[w] + length; u++) - dt[u] = D; - rankVal[w] += length; - } - } - - return iSize; -} - -static BYTE HUF_decodeSymbolX2(BIT_DStream_t *Dstream, const HUF_DEltX2 *dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */ - BYTE const c = dt[val].byte; - BIT_skipBits(Dstream, dt[val].nbBits); - return c; -} - -#define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \ - if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \ - HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) - -#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \ - if (ZSTD_64bits()) \ - HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) - -FORCE_INLINE size_t HUF_decodeStreamX2(BYTE *p, BIT_DStream_t *const bitDPtr, BYTE *const pEnd, const HUF_DEltX2 *const dt, const U32 dtLog) -{ - BYTE *const pStart = p; - - /* up to 4 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd - 4)) { - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_1(p, bitDPtr); - HUF_DECODE_SYMBOLX2_2(p, bitDPtr); - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - } - - /* closer to the end */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd)) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - - /* no more data to retrieve from bitstream, hence no need to reload */ - while (p < pEnd) - HUF_DECODE_SYMBOLX2_0(p, bitDPtr); - - return pEnd - pStart; -} - -static size_t HUF_decompress1X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - BYTE *op = (BYTE *)dst; - BYTE *const oend = op + dstSize; - const void *dtPtr = DTable + 1; - const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr; - BIT_DStream_t bitD; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - - { - size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); - if (HUF_isError(errorCode)) - return errorCode; - } - - HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog); - - /* check */ - if (!BIT_endOfDStream(&bitD)) - return ERROR(corruption_detected); - - return dstSize; -} - -size_t HUF_decompress1X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 0) - return ERROR(GENERIC); - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); -} - -size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -{ - const BYTE *ip = (const BYTE *)cSrc; - - size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize); - if (HUF_isError(hSize)) - return hSize; - if (hSize >= cSrcSize) - return ERROR(srcSize_wrong); - ip += hSize; - cSrcSize -= hSize; - - return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx); -} - -static size_t HUF_decompress4X2_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - /* Check */ - if (cSrcSize < 10) - return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ - - { - const BYTE *const istart = (const BYTE *)cSrc; - BYTE *const ostart = (BYTE *)dst; - BYTE *const oend = ostart + dstSize; - const void *const dtPtr = DTable + 1; - const HUF_DEltX2 *const dt = (const HUF_DEltX2 *)dtPtr; - - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - size_t const length1 = ZSTD_readLE16(istart); - size_t const length2 = ZSTD_readLE16(istart + 2); - size_t const length3 = ZSTD_readLE16(istart + 4); - size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); - const BYTE *const istart1 = istart + 6; /* jumpTable */ - const BYTE *const istart2 = istart1 + length1; - const BYTE *const istart3 = istart2 + length2; - const BYTE *const istart4 = istart3 + length3; - const size_t segmentSize = (dstSize + 3) / 4; - BYTE *const opStart2 = ostart + segmentSize; - BYTE *const opStart3 = opStart2 + segmentSize; - BYTE *const opStart4 = opStart3 + segmentSize; - BYTE *op1 = ostart; - BYTE *op2 = opStart2; - BYTE *op3 = opStart3; - BYTE *op4 = opStart4; - U32 endSignal; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - - if (length4 > cSrcSize) - return ERROR(corruption_detected); /* overflow */ - { - size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1); - if (HUF_isError(errorCode)) - return errorCode; - } - { - size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2); - if (HUF_isError(errorCode)) - return errorCode; - } - { - size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3); - if (HUF_isError(errorCode)) - return errorCode; - } - { - size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4); - if (HUF_isError(errorCode)) - return errorCode; - } - - /* 16-32 symbols per loop (4-8 symbols per stream) */ - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - for (; (endSignal == BIT_DStream_unfinished) && (op4 < (oend - 7));) { - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_1(op1, &bitD1); - HUF_DECODE_SYMBOLX2_1(op2, &bitD2); - HUF_DECODE_SYMBOLX2_1(op3, &bitD3); - HUF_DECODE_SYMBOLX2_1(op4, &bitD4); - HUF_DECODE_SYMBOLX2_2(op1, &bitD1); - HUF_DECODE_SYMBOLX2_2(op2, &bitD2); - HUF_DECODE_SYMBOLX2_2(op3, &bitD3); - HUF_DECODE_SYMBOLX2_2(op4, &bitD4); - HUF_DECODE_SYMBOLX2_0(op1, &bitD1); - HUF_DECODE_SYMBOLX2_0(op2, &bitD2); - HUF_DECODE_SYMBOLX2_0(op3, &bitD3); - HUF_DECODE_SYMBOLX2_0(op4, &bitD4); - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - } - - /* check corruption */ - if (op1 > opStart2) - return ERROR(corruption_detected); - if (op2 > opStart3) - return ERROR(corruption_detected); - if (op3 > opStart4) - return ERROR(corruption_detected); - /* note : op4 supposed already verified within main loop */ - - /* finish bitStreams one by one */ - HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog); - - /* check */ - endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endSignal) - return ERROR(corruption_detected); - - /* decoded size */ - return dstSize; - } -} - -size_t HUF_decompress4X2_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 0) - return ERROR(GENERIC); - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); -} - -size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -{ - const BYTE *ip = (const BYTE *)cSrc; - - size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize); - if (HUF_isError(hSize)) - return hSize; - if (hSize >= cSrcSize) - return ERROR(srcSize_wrong); - ip += hSize; - cSrcSize -= hSize; - - return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx); -} - -/* *************************/ -/* double-symbols decoding */ -/* *************************/ -typedef struct { - U16 sequence; - BYTE nbBits; - BYTE length; -} HUF_DEltX4; /* double-symbols decoding */ - -typedef struct { - BYTE symbol; - BYTE weight; -} sortedSymbol_t; - -/* HUF_fillDTableX4Level2() : - * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ -static void HUF_fillDTableX4Level2(HUF_DEltX4 *DTable, U32 sizeLog, const U32 consumed, const U32 *rankValOrigin, const int minWeight, - const sortedSymbol_t *sortedSymbols, const U32 sortedListSize, U32 nbBitsBaseline, U16 baseSeq) -{ - HUF_DEltX4 DElt; - U32 rankVal[HUF_TABLELOG_MAX + 1]; - - /* get pre-calculated rankVal */ - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); - - /* fill skipped values */ - if (minWeight > 1) { - U32 i, skipSize = rankVal[minWeight]; - ZSTD_writeLE16(&(DElt.sequence), baseSeq); - DElt.nbBits = (BYTE)(consumed); - DElt.length = 1; - for (i = 0; i < skipSize; i++) - DTable[i] = DElt; - } - - /* fill DTable */ - { - U32 s; - for (s = 0; s < sortedListSize; s++) { /* note : sortedSymbols already skipped */ - const U32 symbol = sortedSymbols[s].symbol; - const U32 weight = sortedSymbols[s].weight; - const U32 nbBits = nbBitsBaseline - weight; - const U32 length = 1 << (sizeLog - nbBits); - const U32 start = rankVal[weight]; - U32 i = start; - const U32 end = start + length; - - ZSTD_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8))); - DElt.nbBits = (BYTE)(nbBits + consumed); - DElt.length = 2; - do { - DTable[i++] = DElt; - } while (i < end); /* since length >= 1 */ - - rankVal[weight] += length; - } - } -} - -typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1]; -typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1]; - -static void HUF_fillDTableX4(HUF_DEltX4 *DTable, const U32 targetLog, const sortedSymbol_t *sortedList, const U32 sortedListSize, const U32 *rankStart, - rankVal_t rankValOrigin, const U32 maxWeight, const U32 nbBitsBaseline) -{ - U32 rankVal[HUF_TABLELOG_MAX + 1]; - const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ - const U32 minBits = nbBitsBaseline - maxWeight; - U32 s; - - memcpy(rankVal, rankValOrigin, sizeof(rankVal)); - - /* fill DTable */ - for (s = 0; s < sortedListSize; s++) { - const U16 symbol = sortedList[s].symbol; - const U32 weight = sortedList[s].weight; - const U32 nbBits = nbBitsBaseline - weight; - const U32 start = rankVal[weight]; - const U32 length = 1 << (targetLog - nbBits); - - if (targetLog - nbBits >= minBits) { /* enough room for a second symbol */ - U32 sortedRank; - int minWeight = nbBits + scaleLog; - if (minWeight < 1) - minWeight = 1; - sortedRank = rankStart[minWeight]; - HUF_fillDTableX4Level2(DTable + start, targetLog - nbBits, nbBits, rankValOrigin[nbBits], minWeight, sortedList + sortedRank, - sortedListSize - sortedRank, nbBitsBaseline, symbol); - } else { - HUF_DEltX4 DElt; - ZSTD_writeLE16(&(DElt.sequence), symbol); - DElt.nbBits = (BYTE)(nbBits); - DElt.length = 1; - { - U32 const end = start + length; - U32 u; - for (u = start; u < end; u++) - DTable[u] = DElt; - } - } - rankVal[weight] += length; - } -} - -size_t HUF_readDTableX4_wksp(HUF_DTable *DTable, const void *src, size_t srcSize, void *workspace, size_t workspaceSize) -{ - U32 tableLog, maxW, sizeOfSort, nbSymbols; - DTableDesc dtd = HUF_getDTableDesc(DTable); - U32 const maxTableLog = dtd.maxTableLog; - size_t iSize; - void *dtPtr = DTable + 1; /* force compiler to avoid strict-aliasing */ - HUF_DEltX4 *const dt = (HUF_DEltX4 *)dtPtr; - U32 *rankStart; - - rankValCol_t *rankVal; - U32 *rankStats; - U32 *rankStart0; - sortedSymbol_t *sortedSymbol; - BYTE *weightList; - size_t spaceUsed32 = 0; - - HUF_STATIC_ASSERT((sizeof(rankValCol_t) & 3) == 0); - - rankVal = (rankValCol_t *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += (sizeof(rankValCol_t) * HUF_TABLELOG_MAX) >> 2; - rankStats = (U32 *)workspace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_MAX + 1; - rankStart0 = (U32 *)workspace + spaceUsed32; - spaceUsed32 += HUF_TABLELOG_MAX + 2; - sortedSymbol = (sortedSymbol_t *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(sizeof(sortedSymbol_t) * (HUF_SYMBOLVALUE_MAX + 1), sizeof(U32)) >> 2; - weightList = (BYTE *)((U32 *)workspace + spaceUsed32); - spaceUsed32 += ALIGN(HUF_SYMBOLVALUE_MAX + 1, sizeof(U32)) >> 2; - - if ((spaceUsed32 << 2) > workspaceSize) - return ERROR(tableLog_tooLarge); - workspace = (U32 *)workspace + spaceUsed32; - workspaceSize -= (spaceUsed32 << 2); - - rankStart = rankStart0 + 1; - memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1)); - - HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */ - if (maxTableLog > HUF_TABLELOG_MAX) - return ERROR(tableLog_tooLarge); - /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ - - iSize = HUF_readStats_wksp(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize, workspace, workspaceSize); - if (HUF_isError(iSize)) - return iSize; - - /* check result */ - if (tableLog > maxTableLog) - return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */ - - /* find maxWeight */ - for (maxW = tableLog; rankStats[maxW] == 0; maxW--) { - } /* necessarily finds a solution before 0 */ - - /* Get start index of each weight */ - { - U32 w, nextRankStart = 0; - for (w = 1; w < maxW + 1; w++) { - U32 curr = nextRankStart; - nextRankStart += rankStats[w]; - rankStart[w] = curr; - } - rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/ - sizeOfSort = nextRankStart; - } - - /* sort symbols by weight */ - { - U32 s; - for (s = 0; s < nbSymbols; s++) { - U32 const w = weightList[s]; - U32 const r = rankStart[w]++; - sortedSymbol[r].symbol = (BYTE)s; - sortedSymbol[r].weight = (BYTE)w; - } - rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */ - } - - /* Build rankVal */ - { - U32 *const rankVal0 = rankVal[0]; - { - int const rescale = (maxTableLog - tableLog) - 1; /* tableLog <= maxTableLog */ - U32 nextRankVal = 0; - U32 w; - for (w = 1; w < maxW + 1; w++) { - U32 curr = nextRankVal; - nextRankVal += rankStats[w] << (w + rescale); - rankVal0[w] = curr; - } - } - { - U32 const minBits = tableLog + 1 - maxW; - U32 consumed; - for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) { - U32 *const rankValPtr = rankVal[consumed]; - U32 w; - for (w = 1; w < maxW + 1; w++) { - rankValPtr[w] = rankVal0[w] >> consumed; - } - } - } - } - - HUF_fillDTableX4(dt, maxTableLog, sortedSymbol, sizeOfSort, rankStart0, rankVal, maxW, tableLog + 1); - - dtd.tableLog = (BYTE)maxTableLog; - dtd.tableType = 1; - memcpy(DTable, &dtd, sizeof(dtd)); - return iSize; -} - -static U32 HUF_decodeSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt + val, 2); - BIT_skipBits(DStream, dt[val].nbBits); - return dt[val].length; -} - -static U32 HUF_decodeLastSymbolX4(void *op, BIT_DStream_t *DStream, const HUF_DEltX4 *dt, const U32 dtLog) -{ - size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */ - memcpy(op, dt + val, 1); - if (dt[val].length == 1) - BIT_skipBits(DStream, dt[val].nbBits); - else { - if (DStream->bitsConsumed < (sizeof(DStream->bitContainer) * 8)) { - BIT_skipBits(DStream, dt[val].nbBits); - if (DStream->bitsConsumed > (sizeof(DStream->bitContainer) * 8)) - /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */ - DStream->bitsConsumed = (sizeof(DStream->bitContainer) * 8); - } - } - return 1; -} - -#define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \ - if (ZSTD_64bits() || (HUF_TABLELOG_MAX <= 12)) \ - ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) - -#define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \ - if (ZSTD_64bits()) \ - ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog) - -FORCE_INLINE size_t HUF_decodeStreamX4(BYTE *p, BIT_DStream_t *bitDPtr, BYTE *const pEnd, const HUF_DEltX4 *const dt, const U32 dtLog) -{ - BYTE *const pStart = p; - - /* up to 8 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd - (sizeof(bitDPtr->bitContainer) - 1))) { - HUF_DECODE_SYMBOLX4_2(p, bitDPtr); - HUF_DECODE_SYMBOLX4_1(p, bitDPtr); - HUF_DECODE_SYMBOLX4_2(p, bitDPtr); - HUF_DECODE_SYMBOLX4_0(p, bitDPtr); - } - - /* closer to end : up to 2 symbols at a time */ - while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd - 2)) - HUF_DECODE_SYMBOLX4_0(p, bitDPtr); - - while (p <= pEnd - 2) - HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */ - - if (p < pEnd) - p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog); - - return p - pStart; -} - -static size_t HUF_decompress1X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - BIT_DStream_t bitD; - - /* Init */ - { - size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize); - if (HUF_isError(errorCode)) - return errorCode; - } - - /* decode */ - { - BYTE *const ostart = (BYTE *)dst; - BYTE *const oend = ostart + dstSize; - const void *const dtPtr = DTable + 1; /* force compiler to not use strict-aliasing */ - const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog); - } - - /* check */ - if (!BIT_endOfDStream(&bitD)) - return ERROR(corruption_detected); - - /* decoded size */ - return dstSize; -} - -size_t HUF_decompress1X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 1) - return ERROR(GENERIC); - return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); -} - -size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable *DCtx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -{ - const BYTE *ip = (const BYTE *)cSrc; - - size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize, workspace, workspaceSize); - if (HUF_isError(hSize)) - return hSize; - if (hSize >= cSrcSize) - return ERROR(srcSize_wrong); - ip += hSize; - cSrcSize -= hSize; - - return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx); -} - -static size_t HUF_decompress4X4_usingDTable_internal(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - if (cSrcSize < 10) - return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */ - - { - const BYTE *const istart = (const BYTE *)cSrc; - BYTE *const ostart = (BYTE *)dst; - BYTE *const oend = ostart + dstSize; - const void *const dtPtr = DTable + 1; - const HUF_DEltX4 *const dt = (const HUF_DEltX4 *)dtPtr; - - /* Init */ - BIT_DStream_t bitD1; - BIT_DStream_t bitD2; - BIT_DStream_t bitD3; - BIT_DStream_t bitD4; - size_t const length1 = ZSTD_readLE16(istart); - size_t const length2 = ZSTD_readLE16(istart + 2); - size_t const length3 = ZSTD_readLE16(istart + 4); - size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6); - const BYTE *const istart1 = istart + 6; /* jumpTable */ - const BYTE *const istart2 = istart1 + length1; - const BYTE *const istart3 = istart2 + length2; - const BYTE *const istart4 = istart3 + length3; - size_t const segmentSize = (dstSize + 3) / 4; - BYTE *const opStart2 = ostart + segmentSize; - BYTE *const opStart3 = opStart2 + segmentSize; - BYTE *const opStart4 = opStart3 + segmentSize; - BYTE *op1 = ostart; - BYTE *op2 = opStart2; - BYTE *op3 = opStart3; - BYTE *op4 = opStart4; - U32 endSignal; - DTableDesc const dtd = HUF_getDTableDesc(DTable); - U32 const dtLog = dtd.tableLog; - - if (length4 > cSrcSize) - return ERROR(corruption_detected); /* overflow */ - { - size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1); - if (HUF_isError(errorCode)) - return errorCode; - } - { - size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2); - if (HUF_isError(errorCode)) - return errorCode; - } - { - size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3); - if (HUF_isError(errorCode)) - return errorCode; - } - { - size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4); - if (HUF_isError(errorCode)) - return errorCode; - } - - /* 16-32 symbols per loop (4-8 symbols per stream) */ - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - for (; (endSignal == BIT_DStream_unfinished) & (op4 < (oend - (sizeof(bitD4.bitContainer) - 1)));) { - HUF_DECODE_SYMBOLX4_2(op1, &bitD1); - HUF_DECODE_SYMBOLX4_2(op2, &bitD2); - HUF_DECODE_SYMBOLX4_2(op3, &bitD3); - HUF_DECODE_SYMBOLX4_2(op4, &bitD4); - HUF_DECODE_SYMBOLX4_1(op1, &bitD1); - HUF_DECODE_SYMBOLX4_1(op2, &bitD2); - HUF_DECODE_SYMBOLX4_1(op3, &bitD3); - HUF_DECODE_SYMBOLX4_1(op4, &bitD4); - HUF_DECODE_SYMBOLX4_2(op1, &bitD1); - HUF_DECODE_SYMBOLX4_2(op2, &bitD2); - HUF_DECODE_SYMBOLX4_2(op3, &bitD3); - HUF_DECODE_SYMBOLX4_2(op4, &bitD4); - HUF_DECODE_SYMBOLX4_0(op1, &bitD1); - HUF_DECODE_SYMBOLX4_0(op2, &bitD2); - HUF_DECODE_SYMBOLX4_0(op3, &bitD3); - HUF_DECODE_SYMBOLX4_0(op4, &bitD4); - - endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4); - } - - /* check corruption */ - if (op1 > opStart2) - return ERROR(corruption_detected); - if (op2 > opStart3) - return ERROR(corruption_detected); - if (op3 > opStart4) - return ERROR(corruption_detected); - /* note : op4 already verified within main loop */ - - /* finish bitStreams one by one */ - HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog); - HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog); - HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog); - HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog); - - /* check */ - { - U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4); - if (!endCheck) - return ERROR(corruption_detected); - } - - /* decoded size */ - return dstSize; - } -} - -size_t HUF_decompress4X4_usingDTable(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - DTableDesc dtd = HUF_getDTableDesc(DTable); - if (dtd.tableType != 1) - return ERROR(GENERIC); - return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable); -} - -size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -{ - const BYTE *ip = (const BYTE *)cSrc; - - size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize, workspace, workspaceSize); - if (HUF_isError(hSize)) - return hSize; - if (hSize >= cSrcSize) - return ERROR(srcSize_wrong); - ip += hSize; - cSrcSize -= hSize; - - return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx); -} - -/* ********************************/ -/* Generic decompression selector */ -/* ********************************/ - -size_t HUF_decompress1X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); - return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) - : HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable); -} - -size_t HUF_decompress4X_usingDTable(void *dst, size_t maxDstSize, const void *cSrc, size_t cSrcSize, const HUF_DTable *DTable) -{ - DTableDesc const dtd = HUF_getDTableDesc(DTable); - return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) - : HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable); -} - -typedef struct { - U32 tableTime; - U32 decode256Time; -} algo_time_t; -static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] = { - /* single, double, quad */ - {{0, 0}, {1, 1}, {2, 2}}, /* Q==0 : impossible */ - {{0, 0}, {1, 1}, {2, 2}}, /* Q==1 : impossible */ - {{38, 130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */ - {{448, 128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */ - {{556, 128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */ - {{714, 128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */ - {{883, 128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */ - {{897, 128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */ - {{926, 128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */ - {{947, 128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */ - {{1107, 128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */ - {{1177, 128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */ - {{1242, 128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */ - {{1349, 128}, {2644, 106}, {5260, 106}}, /* Q ==13 : 81-87% */ - {{1455, 128}, {2422, 124}, {4174, 124}}, /* Q ==14 : 87-93% */ - {{722, 128}, {1891, 145}, {1936, 146}}, /* Q ==15 : 93-99% */ -}; - -/** HUF_selectDecoder() : -* Tells which decoder is likely to decode faster, -* based on a set of pre-determined metrics. -* @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 . -* Assumption : 0 < cSrcSize < dstSize <= 128 KB */ -U32 HUF_selectDecoder(size_t dstSize, size_t cSrcSize) -{ - /* decoder timing evaluation */ - U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */ - U32 const D256 = (U32)(dstSize >> 8); - U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256); - U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256); - DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */ - - return DTime1 < DTime0; -} - -typedef size_t (*decompressionAlgo)(void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize); - -size_t HUF_decompress4X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -{ - /* validation checks */ - if (dstSize == 0) - return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) - return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { - memcpy(dst, cSrc, dstSize); - return dstSize; - } /* not compressed */ - if (cSrcSize == 1) { - memset(dst, *(const BYTE *)cSrc, dstSize); - return dstSize; - } /* RLE */ - - { - U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); - return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize) - : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize); - } -} - -size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -{ - /* validation checks */ - if (dstSize == 0) - return ERROR(dstSize_tooSmall); - if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) - return ERROR(corruption_detected); /* invalid */ - - { - U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); - return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize) - : HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize); - } -} - -size_t HUF_decompress1X_DCtx_wksp(HUF_DTable *dctx, void *dst, size_t dstSize, const void *cSrc, size_t cSrcSize, void *workspace, size_t workspaceSize) -{ - /* validation checks */ - if (dstSize == 0) - return ERROR(dstSize_tooSmall); - if (cSrcSize > dstSize) - return ERROR(corruption_detected); /* invalid */ - if (cSrcSize == dstSize) { - memcpy(dst, cSrc, dstSize); - return dstSize; - } /* not compressed */ - if (cSrcSize == 1) { - memset(dst, *(const BYTE *)cSrc, dstSize); - return dstSize; - } /* RLE */ - - { - U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize); - return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize) - : HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workspace, workspaceSize); - } -} diff --git a/lib/zstd/mem.h b/lib/zstd/mem.h deleted file mode 100644 index 93d7a2c377fe..000000000000 --- a/lib/zstd/mem.h +++ /dev/null @@ -1,151 +0,0 @@ -/** - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - */ - -#ifndef MEM_H_MODULE -#define MEM_H_MODULE - -/*-**************************************** -* Dependencies -******************************************/ -#include -#include /* memcpy */ -#include /* size_t, ptrdiff_t */ - -/*-**************************************** -* Compiler specifics -******************************************/ -#define ZSTD_STATIC static inline - -/*-************************************************************** -* Basic Types -*****************************************************************/ -typedef uint8_t BYTE; -typedef uint16_t U16; -typedef int16_t S16; -typedef uint32_t U32; -typedef int32_t S32; -typedef uint64_t U64; -typedef int64_t S64; -typedef ptrdiff_t iPtrDiff; -typedef uintptr_t uPtrDiff; - -/*-************************************************************** -* Memory I/O -*****************************************************************/ -ZSTD_STATIC unsigned ZSTD_32bits(void) { return sizeof(size_t) == 4; } -ZSTD_STATIC unsigned ZSTD_64bits(void) { return sizeof(size_t) == 8; } - -#if defined(__LITTLE_ENDIAN) -#define ZSTD_LITTLE_ENDIAN 1 -#else -#define ZSTD_LITTLE_ENDIAN 0 -#endif - -ZSTD_STATIC unsigned ZSTD_isLittleEndian(void) { return ZSTD_LITTLE_ENDIAN; } - -ZSTD_STATIC U16 ZSTD_read16(const void *memPtr) { return get_unaligned((const U16 *)memPtr); } - -ZSTD_STATIC U32 ZSTD_read32(const void *memPtr) { return get_unaligned((const U32 *)memPtr); } - -ZSTD_STATIC U64 ZSTD_read64(const void *memPtr) { return get_unaligned((const U64 *)memPtr); } - -ZSTD_STATIC size_t ZSTD_readST(const void *memPtr) { return get_unaligned((const size_t *)memPtr); } - -ZSTD_STATIC void ZSTD_write16(void *memPtr, U16 value) { put_unaligned(value, (U16 *)memPtr); } - -ZSTD_STATIC void ZSTD_write32(void *memPtr, U32 value) { put_unaligned(value, (U32 *)memPtr); } - -ZSTD_STATIC void ZSTD_write64(void *memPtr, U64 value) { put_unaligned(value, (U64 *)memPtr); } - -/*=== Little endian r/w ===*/ - -ZSTD_STATIC U16 ZSTD_readLE16(const void *memPtr) { return get_unaligned_le16(memPtr); } - -ZSTD_STATIC void ZSTD_writeLE16(void *memPtr, U16 val) { put_unaligned_le16(val, memPtr); } - -ZSTD_STATIC U32 ZSTD_readLE24(const void *memPtr) { return ZSTD_readLE16(memPtr) + (((const BYTE *)memPtr)[2] << 16); } - -ZSTD_STATIC void ZSTD_writeLE24(void *memPtr, U32 val) -{ - ZSTD_writeLE16(memPtr, (U16)val); - ((BYTE *)memPtr)[2] = (BYTE)(val >> 16); -} - -ZSTD_STATIC U32 ZSTD_readLE32(const void *memPtr) { return get_unaligned_le32(memPtr); } - -ZSTD_STATIC void ZSTD_writeLE32(void *memPtr, U32 val32) { put_unaligned_le32(val32, memPtr); } - -ZSTD_STATIC U64 ZSTD_readLE64(const void *memPtr) { return get_unaligned_le64(memPtr); } - -ZSTD_STATIC void ZSTD_writeLE64(void *memPtr, U64 val64) { put_unaligned_le64(val64, memPtr); } - -ZSTD_STATIC size_t ZSTD_readLEST(const void *memPtr) -{ - if (ZSTD_32bits()) - return (size_t)ZSTD_readLE32(memPtr); - else - return (size_t)ZSTD_readLE64(memPtr); -} - -ZSTD_STATIC void ZSTD_writeLEST(void *memPtr, size_t val) -{ - if (ZSTD_32bits()) - ZSTD_writeLE32(memPtr, (U32)val); - else - ZSTD_writeLE64(memPtr, (U64)val); -} - -/*=== Big endian r/w ===*/ - -ZSTD_STATIC U32 ZSTD_readBE32(const void *memPtr) { return get_unaligned_be32(memPtr); } - -ZSTD_STATIC void ZSTD_writeBE32(void *memPtr, U32 val32) { put_unaligned_be32(val32, memPtr); } - -ZSTD_STATIC U64 ZSTD_readBE64(const void *memPtr) { return get_unaligned_be64(memPtr); } - -ZSTD_STATIC void ZSTD_writeBE64(void *memPtr, U64 val64) { put_unaligned_be64(val64, memPtr); } - -ZSTD_STATIC size_t ZSTD_readBEST(const void *memPtr) -{ - if (ZSTD_32bits()) - return (size_t)ZSTD_readBE32(memPtr); - else - return (size_t)ZSTD_readBE64(memPtr); -} - -ZSTD_STATIC void ZSTD_writeBEST(void *memPtr, size_t val) -{ - if (ZSTD_32bits()) - ZSTD_writeBE32(memPtr, (U32)val); - else - ZSTD_writeBE64(memPtr, (U64)val); -} - -/* function safe only for comparisons */ -ZSTD_STATIC U32 ZSTD_readMINMATCH(const void *memPtr, U32 length) -{ - switch (length) { - default: - case 4: return ZSTD_read32(memPtr); - case 3: - if (ZSTD_isLittleEndian()) - return ZSTD_read32(memPtr) << 8; - else - return ZSTD_read32(memPtr) >> 8; - } -} - -#endif /* MEM_H_MODULE */ diff --git a/lib/zstd/zstd_common.c b/lib/zstd/zstd_common.c deleted file mode 100644 index a282624ee155..000000000000 --- a/lib/zstd/zstd_common.c +++ /dev/null @@ -1,75 +0,0 @@ -/** - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - */ - -/*-************************************* -* Dependencies -***************************************/ -#include "error_private.h" -#include "zstd_internal.h" /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */ -#include - -/*=************************************************************** -* Custom allocator -****************************************************************/ - -#define stack_push(stack, size) \ - ({ \ - void *const ptr = ZSTD_PTR_ALIGN((stack)->ptr); \ - (stack)->ptr = (char *)ptr + (size); \ - (stack)->ptr <= (stack)->end ? ptr : NULL; \ - }) - -ZSTD_customMem ZSTD_initStack(void *workspace, size_t workspaceSize) -{ - ZSTD_customMem stackMem = {ZSTD_stackAlloc, ZSTD_stackFree, workspace}; - ZSTD_stack *stack = (ZSTD_stack *)workspace; - /* Verify preconditions */ - if (!workspace || workspaceSize < sizeof(ZSTD_stack) || workspace != ZSTD_PTR_ALIGN(workspace)) { - ZSTD_customMem error = {NULL, NULL, NULL}; - return error; - } - /* Initialize the stack */ - stack->ptr = workspace; - stack->end = (char *)workspace + workspaceSize; - stack_push(stack, sizeof(ZSTD_stack)); - return stackMem; -} - -void *ZSTD_stackAllocAll(void *opaque, size_t *size) -{ - ZSTD_stack *stack = (ZSTD_stack *)opaque; - *size = (BYTE const *)stack->end - (BYTE *)ZSTD_PTR_ALIGN(stack->ptr); - return stack_push(stack, *size); -} - -void *ZSTD_stackAlloc(void *opaque, size_t size) -{ - ZSTD_stack *stack = (ZSTD_stack *)opaque; - return stack_push(stack, size); -} -void ZSTD_stackFree(void *opaque, void *address) -{ - (void)opaque; - (void)address; -} - -void *ZSTD_malloc(size_t size, ZSTD_customMem customMem) { return customMem.customAlloc(customMem.opaque, size); } - -void ZSTD_free(void *ptr, ZSTD_customMem customMem) -{ - if (ptr != NULL) - customMem.customFree(customMem.opaque, ptr); -} diff --git a/lib/zstd/zstd_compress_module.c b/lib/zstd/zstd_compress_module.c new file mode 100644 index 000000000000..37d08ff43e6e --- /dev/null +++ b/lib/zstd/zstd_compress_module.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include +#include +#include +#include + +#include "common/zstd_deps.h" +#include "common/zstd_internal.h" + +int zstd_min_clevel(void) +{ + return ZSTD_minCLevel(); +} +EXPORT_SYMBOL(zstd_min_clevel); + +int zstd_max_clevel(void) +{ + return ZSTD_maxCLevel(); +} +EXPORT_SYMBOL(zstd_max_clevel); + +size_t zstd_compress_bound(size_t src_size) +{ + return ZSTD_compressBound(src_size); +} +EXPORT_SYMBOL(zstd_compress_bound); + +zstd_parameters zstd_get_params(int level, + unsigned long long estimated_src_size) +{ + return ZSTD_getParams(level, estimated_src_size, 0); +} +EXPORT_SYMBOL(zstd_get_params); + +size_t zstd_cctx_workspace_bound(const zstd_compression_parameters *cparams) +{ + return ZSTD_estimateCCtxSize_usingCParams(*cparams); +} +EXPORT_SYMBOL(zstd_cctx_workspace_bound); + +zstd_cctx *zstd_init_cctx(void *workspace, size_t workspace_size) +{ + if (workspace == NULL) + return NULL; + return ZSTD_initStaticCCtx(workspace, workspace_size); +} +EXPORT_SYMBOL(zstd_init_cctx); + +size_t zstd_compress_cctx(zstd_cctx *cctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size, const zstd_parameters *parameters) +{ + return ZSTD_compress_advanced(cctx, dst, dst_capacity, src, src_size, NULL, 0, *parameters); +} +EXPORT_SYMBOL(zstd_compress_cctx); + +size_t zstd_cstream_workspace_bound(const zstd_compression_parameters *cparams) +{ + return ZSTD_estimateCStreamSize_usingCParams(*cparams); +} +EXPORT_SYMBOL(zstd_cstream_workspace_bound); + +zstd_cstream *zstd_init_cstream(const zstd_parameters *parameters, + unsigned long long pledged_src_size, void *workspace, size_t workspace_size) +{ + zstd_cstream *cstream; + size_t ret; + + if (workspace == NULL) + return NULL; + + cstream = ZSTD_initStaticCStream(workspace, workspace_size); + if (cstream == NULL) + return NULL; + + /* 0 means unknown in linux zstd API but means 0 in new zstd API */ + if (pledged_src_size == 0) + pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN; + + ret = ZSTD_initCStream_advanced(cstream, NULL, 0, *parameters, pledged_src_size); + if (ZSTD_isError(ret)) + return NULL; + + return cstream; +} +EXPORT_SYMBOL(zstd_init_cstream); + +size_t zstd_reset_cstream(zstd_cstream *cstream, + unsigned long long pledged_src_size) +{ + return ZSTD_resetCStream(cstream, pledged_src_size); +} +EXPORT_SYMBOL(zstd_reset_cstream); + +size_t zstd_compress_stream(zstd_cstream *cstream, zstd_out_buffer *output, + zstd_in_buffer *input) +{ + return ZSTD_compressStream(cstream, output, input); +} +EXPORT_SYMBOL(zstd_compress_stream); + +size_t zstd_flush_stream(zstd_cstream *cstream, zstd_out_buffer *output) +{ + return ZSTD_flushStream(cstream, output); +} +EXPORT_SYMBOL(zstd_flush_stream); + +size_t zstd_end_stream(zstd_cstream *cstream, zstd_out_buffer *output) +{ + return ZSTD_endStream(cstream, output); +} +EXPORT_SYMBOL(zstd_end_stream); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Zstd Compressor"); diff --git a/lib/zstd/zstd_decompress_module.c b/lib/zstd/zstd_decompress_module.c new file mode 100644 index 000000000000..15005cdb9eca --- /dev/null +++ b/lib/zstd/zstd_decompress_module.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Facebook, Inc. + * All rights reserved. + * + * This source code is licensed under both the BSD-style license (found in the + * LICENSE file in the root directory of this source tree) and the GPLv2 (found + * in the COPYING file in the root directory of this source tree). + * You may select, at your option, one of the above-listed licenses. + */ + +#include +#include +#include +#include + +#include "common/zstd_deps.h" + +/* Common symbols. zstd_compress must depend on zstd_decompress. */ + +unsigned int zstd_is_error(size_t code) +{ + return ZSTD_isError(code); +} +EXPORT_SYMBOL(zstd_is_error); + +zstd_error_code zstd_get_error_code(size_t code) +{ + return ZSTD_getErrorCode(code); +} +EXPORT_SYMBOL(zstd_get_error_code); + +const char *zstd_get_error_name(size_t code) +{ + return ZSTD_getErrorName(code); +} +EXPORT_SYMBOL(zstd_get_error_name); + +/* Decompression symbols. */ + +size_t zstd_dctx_workspace_bound(void) +{ + return ZSTD_estimateDCtxSize(); +} +EXPORT_SYMBOL(zstd_dctx_workspace_bound); + +zstd_dctx *zstd_init_dctx(void *workspace, size_t workspace_size) +{ + if (workspace == NULL) + return NULL; + return ZSTD_initStaticDCtx(workspace, workspace_size); +} +EXPORT_SYMBOL(zstd_init_dctx); + +size_t zstd_decompress_dctx(zstd_dctx *dctx, void *dst, size_t dst_capacity, + const void *src, size_t src_size) +{ + return ZSTD_decompressDCtx(dctx, dst, dst_capacity, src, src_size); +} +EXPORT_SYMBOL(zstd_decompress_dctx); + +size_t zstd_dstream_workspace_bound(size_t max_window_size) +{ + return ZSTD_estimateDStreamSize(max_window_size); +} +EXPORT_SYMBOL(zstd_dstream_workspace_bound); + +zstd_dstream *zstd_init_dstream(size_t max_window_size, void *workspace, + size_t workspace_size) +{ + if (workspace == NULL) + return NULL; + (void)max_window_size; + return ZSTD_initStaticDStream(workspace, workspace_size); +} +EXPORT_SYMBOL(zstd_init_dstream); + +size_t zstd_reset_dstream(zstd_dstream *dstream) +{ + return ZSTD_resetDStream(dstream); +} +EXPORT_SYMBOL(zstd_reset_dstream); + +size_t zstd_decompress_stream(zstd_dstream *dstream, zstd_out_buffer *output, + zstd_in_buffer *input) +{ + return ZSTD_decompressStream(dstream, output, input); +} +EXPORT_SYMBOL(zstd_decompress_stream); + +size_t zstd_find_frame_compressed_size(const void *src, size_t src_size) +{ + return ZSTD_findFrameCompressedSize(src, src_size); +} +EXPORT_SYMBOL(zstd_find_frame_compressed_size); + +size_t zstd_get_frame_header(zstd_frame_header *header, const void *src, + size_t src_size) +{ + return ZSTD_getFrameHeader(header, src, src_size); +} +EXPORT_SYMBOL(zstd_get_frame_header); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("Zstd Decompressor"); diff --git a/lib/zstd/zstd_internal.h b/lib/zstd/zstd_internal.h deleted file mode 100644 index dac753397f86..000000000000 --- a/lib/zstd/zstd_internal.h +++ /dev/null @@ -1,273 +0,0 @@ -/** - * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - */ - -#ifndef ZSTD_CCOMMON_H_MODULE -#define ZSTD_CCOMMON_H_MODULE - -/*-******************************************************* -* Compiler specifics -*********************************************************/ -#define FORCE_INLINE static __always_inline -#define FORCE_NOINLINE static noinline - -/*-************************************* -* Dependencies -***************************************/ -#include "error_private.h" -#include "mem.h" -#include -#include -#include -#include - -/*-************************************* -* shared macros -***************************************/ -#define MIN(a, b) ((a) < (b) ? (a) : (b)) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) -#define CHECK_F(f) \ - { \ - size_t const errcod = f; \ - if (ERR_isError(errcod)) \ - return errcod; \ - } /* check and Forward error code */ -#define CHECK_E(f, e) \ - { \ - size_t const errcod = f; \ - if (ERR_isError(errcod)) \ - return ERROR(e); \ - } /* check and send Error code */ -#define ZSTD_STATIC_ASSERT(c) \ - { \ - enum { ZSTD_static_assert = 1 / (int)(!!(c)) }; \ - } - -/*-************************************* -* Common constants -***************************************/ -#define ZSTD_OPT_NUM (1 << 12) -#define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7+ */ - -#define ZSTD_REP_NUM 3 /* number of repcodes */ -#define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */ -#define ZSTD_REP_MOVE (ZSTD_REP_NUM - 1) -#define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM) -static const U32 repStartValue[ZSTD_REP_NUM] = {1, 4, 8}; - -#define KB *(1 << 10) -#define MB *(1 << 20) -#define GB *(1U << 30) - -#define BIT7 128 -#define BIT6 64 -#define BIT5 32 -#define BIT4 16 -#define BIT1 2 -#define BIT0 1 - -#define ZSTD_WINDOWLOG_ABSOLUTEMIN 10 -static const size_t ZSTD_fcs_fieldSize[4] = {0, 2, 4, 8}; -static const size_t ZSTD_did_fieldSize[4] = {0, 1, 2, 4}; - -#define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */ -static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE; -typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e; - -#define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */ -#define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */ - -#define HufLog 12 -typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e; - -#define LONGNBSEQ 0x7F00 - -#define MINMATCH 3 -#define EQUAL_READ32 4 - -#define Litbits 8 -#define MaxLit ((1 << Litbits) - 1) -#define MaxML 52 -#define MaxLL 35 -#define MaxOff 28 -#define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */ -#define MLFSELog 9 -#define LLFSELog 9 -#define OffFSELog 8 - -static const U32 LL_bits[MaxLL + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -static const S16 LL_defaultNorm[MaxLL + 1] = {4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, -1, -1, -1, -1}; -#define LL_DEFAULTNORMLOG 6 /* for static allocation */ -static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG; - -static const U32 ML_bits[MaxML + 1] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; -static const S16 ML_defaultNorm[MaxML + 1] = {1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1, -1}; -#define ML_DEFAULTNORMLOG 6 /* for static allocation */ -static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG; - -static const S16 OF_defaultNorm[MaxOff + 1] = {1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1}; -#define OF_DEFAULTNORMLOG 5 /* for static allocation */ -static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG; - -/*-******************************************* -* Shared functions to include for inlining -*********************************************/ -ZSTD_STATIC void ZSTD_copy8(void *dst, const void *src) { - /* - * zstd relies heavily on gcc being able to analyze and inline this - * memcpy() call, since it is called in a tight loop. Preboot mode - * is compiled in freestanding mode, which stops gcc from analyzing - * memcpy(). Use __builtin_memcpy() to tell gcc to analyze this as a - * regular memcpy(). - */ - __builtin_memcpy(dst, src, 8); -} -/*! ZSTD_wildcopy() : -* custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */ -#define WILDCOPY_OVERLENGTH 8 -ZSTD_STATIC void ZSTD_wildcopy(void *dst, const void *src, ptrdiff_t length) -{ - const BYTE* ip = (const BYTE*)src; - BYTE* op = (BYTE*)dst; - BYTE* const oend = op + length; -#if defined(GCC_VERSION) && GCC_VERSION >= 70000 && GCC_VERSION < 70200 - /* - * Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81388. - * Avoid the bad case where the loop only runs once by handling the - * special case separately. This doesn't trigger the bug because it - * doesn't involve pointer/integer overflow. - */ - if (length <= 8) - return ZSTD_copy8(dst, src); -#endif - do { - ZSTD_copy8(op, ip); - op += 8; - ip += 8; - } while (op < oend); -} - -/*-******************************************* -* Private interfaces -*********************************************/ -typedef struct ZSTD_stats_s ZSTD_stats_t; - -typedef struct { - U32 off; - U32 len; -} ZSTD_match_t; - -typedef struct { - U32 price; - U32 off; - U32 mlen; - U32 litlen; - U32 rep[ZSTD_REP_NUM]; -} ZSTD_optimal_t; - -typedef struct seqDef_s { - U32 offset; - U16 litLength; - U16 matchLength; -} seqDef; - -typedef struct { - seqDef *sequencesStart; - seqDef *sequences; - BYTE *litStart; - BYTE *lit; - BYTE *llCode; - BYTE *mlCode; - BYTE *ofCode; - U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */ - U32 longLengthPos; - /* opt */ - ZSTD_optimal_t *priceTable; - ZSTD_match_t *matchTable; - U32 *matchLengthFreq; - U32 *litLengthFreq; - U32 *litFreq; - U32 *offCodeFreq; - U32 matchLengthSum; - U32 matchSum; - U32 litLengthSum; - U32 litSum; - U32 offCodeSum; - U32 log2matchLengthSum; - U32 log2matchSum; - U32 log2litLengthSum; - U32 log2litSum; - U32 log2offCodeSum; - U32 factor; - U32 staticPrices; - U32 cachedPrice; - U32 cachedLitLength; - const BYTE *cachedLiterals; -} seqStore_t; - -const seqStore_t *ZSTD_getSeqStore(const ZSTD_CCtx *ctx); -void ZSTD_seqToCodes(const seqStore_t *seqStorePtr); -int ZSTD_isSkipFrame(ZSTD_DCtx *dctx); - -/*= Custom memory allocation functions */ -typedef void *(*ZSTD_allocFunction)(void *opaque, size_t size); -typedef void (*ZSTD_freeFunction)(void *opaque, void *address); -typedef struct { - ZSTD_allocFunction customAlloc; - ZSTD_freeFunction customFree; - void *opaque; -} ZSTD_customMem; - -void *ZSTD_malloc(size_t size, ZSTD_customMem customMem); -void ZSTD_free(void *ptr, ZSTD_customMem customMem); - -/*====== stack allocation ======*/ - -typedef struct { - void *ptr; - const void *end; -} ZSTD_stack; - -#define ZSTD_ALIGN(x) ALIGN(x, sizeof(size_t)) -#define ZSTD_PTR_ALIGN(p) PTR_ALIGN(p, sizeof(size_t)) - -ZSTD_customMem ZSTD_initStack(void *workspace, size_t workspaceSize); - -void *ZSTD_stackAllocAll(void *opaque, size_t *size); -void *ZSTD_stackAlloc(void *opaque, size_t size); -void ZSTD_stackFree(void *opaque, void *address); - -/*====== common function ======*/ - -ZSTD_STATIC U32 ZSTD_highbit32(U32 val) { return 31 - __builtin_clz(val); } - -/* hidden functions */ - -/* ZSTD_invalidateRepCodes() : - * ensures next compression will not use repcodes from previous block. - * Note : only works with regular variant; - * do not use with extDict variant ! */ -void ZSTD_invalidateRepCodes(ZSTD_CCtx *cctx); - -size_t ZSTD_freeCCtx(ZSTD_CCtx *cctx); -size_t ZSTD_freeDCtx(ZSTD_DCtx *dctx); -size_t ZSTD_freeCDict(ZSTD_CDict *cdict); -size_t ZSTD_freeDDict(ZSTD_DDict *cdict); -size_t ZSTD_freeCStream(ZSTD_CStream *zcs); -size_t ZSTD_freeDStream(ZSTD_DStream *zds); - -#endif /* ZSTD_CCOMMON_H_MODULE */ diff --git a/lib/zstd/zstd_opt.h b/lib/zstd/zstd_opt.h deleted file mode 100644 index 55e1b4cba808..000000000000 --- a/lib/zstd/zstd_opt.h +++ /dev/null @@ -1,1014 +0,0 @@ -/** - * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc. - * All rights reserved. - * - * This source code is licensed under the BSD-style license found in the - * LICENSE file in the root directory of https://github.com/facebook/zstd. - * An additional grant of patent rights can be found in the PATENTS file in the - * same directory. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the GNU General Public License version 2 as published by the - * Free Software Foundation. This program is dual-licensed; you may select - * either version 2 of the GNU General Public License ("GPL") or BSD license - * ("BSD"). - */ - -/* Note : this file is intended to be included within zstd_compress.c */ - -#ifndef ZSTD_OPT_H_91842398743 -#define ZSTD_OPT_H_91842398743 - -#define ZSTD_LITFREQ_ADD 2 -#define ZSTD_FREQ_DIV 4 -#define ZSTD_MAX_PRICE (1 << 30) - -/*-************************************* -* Price functions for optimal parser -***************************************/ -FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t *ssPtr) -{ - ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum + 1); - ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum + 1); - ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum + 1); - ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum + 1); - ssPtr->factor = 1 + ((ssPtr->litSum >> 5) / ssPtr->litLengthSum) + ((ssPtr->litSum << 1) / (ssPtr->litSum + ssPtr->matchSum)); -} - -ZSTD_STATIC void ZSTD_rescaleFreqs(seqStore_t *ssPtr, const BYTE *src, size_t srcSize) -{ - unsigned u; - - ssPtr->cachedLiterals = NULL; - ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; - ssPtr->staticPrices = 0; - - if (ssPtr->litLengthSum == 0) { - if (srcSize <= 1024) - ssPtr->staticPrices = 1; - - for (u = 0; u <= MaxLit; u++) - ssPtr->litFreq[u] = 0; - for (u = 0; u < srcSize; u++) - ssPtr->litFreq[src[u]]++; - - ssPtr->litSum = 0; - ssPtr->litLengthSum = MaxLL + 1; - ssPtr->matchLengthSum = MaxML + 1; - ssPtr->offCodeSum = (MaxOff + 1); - ssPtr->matchSum = (ZSTD_LITFREQ_ADD << Litbits); - - for (u = 0; u <= MaxLit; u++) { - ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u] >> ZSTD_FREQ_DIV); - ssPtr->litSum += ssPtr->litFreq[u]; - } - for (u = 0; u <= MaxLL; u++) - ssPtr->litLengthFreq[u] = 1; - for (u = 0; u <= MaxML; u++) - ssPtr->matchLengthFreq[u] = 1; - for (u = 0; u <= MaxOff; u++) - ssPtr->offCodeFreq[u] = 1; - } else { - ssPtr->matchLengthSum = 0; - ssPtr->litLengthSum = 0; - ssPtr->offCodeSum = 0; - ssPtr->matchSum = 0; - ssPtr->litSum = 0; - - for (u = 0; u <= MaxLit; u++) { - ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u] >> (ZSTD_FREQ_DIV + 1)); - ssPtr->litSum += ssPtr->litFreq[u]; - } - for (u = 0; u <= MaxLL; u++) { - ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u] >> (ZSTD_FREQ_DIV + 1)); - ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; - } - for (u = 0; u <= MaxML; u++) { - ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u] >> ZSTD_FREQ_DIV); - ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; - ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); - } - ssPtr->matchSum *= ZSTD_LITFREQ_ADD; - for (u = 0; u <= MaxOff; u++) { - ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u] >> ZSTD_FREQ_DIV); - ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; - } - } - - ZSTD_setLog2Prices(ssPtr); -} - -FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t *ssPtr, U32 litLength, const BYTE *literals) -{ - U32 price, u; - - if (ssPtr->staticPrices) - return ZSTD_highbit32((U32)litLength + 1) + (litLength * 6); - - if (litLength == 0) - return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0] + 1); - - /* literals */ - if (ssPtr->cachedLiterals == literals) { - U32 const additional = litLength - ssPtr->cachedLitLength; - const BYTE *literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength; - price = ssPtr->cachedPrice + additional * ssPtr->log2litSum; - for (u = 0; u < additional; u++) - price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]] + 1); - ssPtr->cachedPrice = price; - ssPtr->cachedLitLength = litLength; - } else { - price = litLength * ssPtr->log2litSum; - for (u = 0; u < litLength; u++) - price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]] + 1); - - if (litLength >= 12) { - ssPtr->cachedLiterals = literals; - ssPtr->cachedPrice = price; - ssPtr->cachedLitLength = litLength; - } - } - - /* literal Length */ - { - const BYTE LL_deltaCode = 19; - const BYTE llCode = (litLength > 63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; - price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode] + 1); - } - - return price; -} - -FORCE_INLINE U32 ZSTD_getPrice(seqStore_t *seqStorePtr, U32 litLength, const BYTE *literals, U32 offset, U32 matchLength, const int ultra) -{ - /* offset */ - U32 price; - BYTE const offCode = (BYTE)ZSTD_highbit32(offset + 1); - - if (seqStorePtr->staticPrices) - return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength + 1) + 16 + offCode; - - price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode] + 1); - if (!ultra && offCode >= 20) - price += (offCode - 19) * 2; - - /* match Length */ - { - const BYTE ML_deltaCode = 36; - const BYTE mlCode = (matchLength > 127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; - price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode] + 1); - } - - return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor; -} - -ZSTD_STATIC void ZSTD_updatePrice(seqStore_t *seqStorePtr, U32 litLength, const BYTE *literals, U32 offset, U32 matchLength) -{ - U32 u; - - /* literals */ - seqStorePtr->litSum += litLength * ZSTD_LITFREQ_ADD; - for (u = 0; u < litLength; u++) - seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; - - /* literal Length */ - { - const BYTE LL_deltaCode = 19; - const BYTE llCode = (litLength > 63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength]; - seqStorePtr->litLengthFreq[llCode]++; - seqStorePtr->litLengthSum++; - } - - /* match offset */ - { - BYTE const offCode = (BYTE)ZSTD_highbit32(offset + 1); - seqStorePtr->offCodeSum++; - seqStorePtr->offCodeFreq[offCode]++; - } - - /* match Length */ - { - const BYTE ML_deltaCode = 36; - const BYTE mlCode = (matchLength > 127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength]; - seqStorePtr->matchLengthFreq[mlCode]++; - seqStorePtr->matchLengthSum++; - } - - ZSTD_setLog2Prices(seqStorePtr); -} - -#define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \ - { \ - while (last_pos < pos) { \ - opt[last_pos + 1].price = ZSTD_MAX_PRICE; \ - last_pos++; \ - } \ - opt[pos].mlen = mlen_; \ - opt[pos].off = offset_; \ - opt[pos].litlen = litlen_; \ - opt[pos].price = price_; \ - } - -/* Update hashTable3 up to ip (excluded) - Assumption : always within prefix (i.e. not within extDict) */ -FORCE_INLINE -U32 ZSTD_insertAndFindFirstIndexHash3(ZSTD_CCtx *zc, const BYTE *ip) -{ - U32 *const hashTable3 = zc->hashTable3; - U32 const hashLog3 = zc->hashLog3; - const BYTE *const base = zc->base; - U32 idx = zc->nextToUpdate3; - const U32 target = zc->nextToUpdate3 = (U32)(ip - base); - const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3); - - while (idx < target) { - hashTable3[ZSTD_hash3Ptr(base + idx, hashLog3)] = idx; - idx++; - } - - return hashTable3[hash3]; -} - -/*-************************************* -* Binary Tree search -***************************************/ -static U32 ZSTD_insertBtAndGetAllMatches(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, U32 nbCompares, const U32 mls, U32 extDict, - ZSTD_match_t *matches, const U32 minMatchLen) -{ - const BYTE *const base = zc->base; - const U32 curr = (U32)(ip - base); - const U32 hashLog = zc->params.cParams.hashLog; - const size_t h = ZSTD_hashPtr(ip, hashLog, mls); - U32 *const hashTable = zc->hashTable; - U32 matchIndex = hashTable[h]; - U32 *const bt = zc->chainTable; - const U32 btLog = zc->params.cParams.chainLog - 1; - const U32 btMask = (1U << btLog) - 1; - size_t commonLengthSmaller = 0, commonLengthLarger = 0; - const BYTE *const dictBase = zc->dictBase; - const U32 dictLimit = zc->dictLimit; - const BYTE *const dictEnd = dictBase + dictLimit; - const BYTE *const prefixStart = base + dictLimit; - const U32 btLow = btMask >= curr ? 0 : curr - btMask; - const U32 windowLow = zc->lowLimit; - U32 *smallerPtr = bt + 2 * (curr & btMask); - U32 *largerPtr = bt + 2 * (curr & btMask) + 1; - U32 matchEndIdx = curr + 8; - U32 dummy32; /* to be nullified at the end */ - U32 mnum = 0; - - const U32 minMatch = (mls == 3) ? 3 : 4; - size_t bestLength = minMatchLen - 1; - - if (minMatch == 3) { /* HC3 match finder */ - U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(zc, ip); - if (matchIndex3 > windowLow && (curr - matchIndex3 < (1 << 18))) { - const BYTE *match; - size_t currMl = 0; - if ((!extDict) || matchIndex3 >= dictLimit) { - match = base + matchIndex3; - if (match[bestLength] == ip[bestLength]) - currMl = ZSTD_count(ip, match, iLimit); - } else { - match = dictBase + matchIndex3; - if (ZSTD_readMINMATCH(match, MINMATCH) == - ZSTD_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */ - currMl = ZSTD_count_2segments(ip + MINMATCH, match + MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH; - } - - /* save best solution */ - if (currMl > bestLength) { - bestLength = currMl; - matches[mnum].off = ZSTD_REP_MOVE_OPT + curr - matchIndex3; - matches[mnum].len = (U32)currMl; - mnum++; - if (currMl > ZSTD_OPT_NUM) - goto update; - if (ip + currMl == iLimit) - goto update; /* best possible, and avoid read overflow*/ - } - } - } - - hashTable[h] = curr; /* Update Hash Table */ - - while (nbCompares-- && (matchIndex > windowLow)) { - U32 *nextPtr = bt + 2 * (matchIndex & btMask); - size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ - const BYTE *match; - - if ((!extDict) || (matchIndex + matchLength >= dictLimit)) { - match = base + matchIndex; - if (match[matchLength] == ip[matchLength]) { - matchLength += ZSTD_count(ip + matchLength + 1, match + matchLength + 1, iLimit) + 1; - } - } else { - match = dictBase + matchIndex; - matchLength += ZSTD_count_2segments(ip + matchLength, match + matchLength, iLimit, dictEnd, prefixStart); - if (matchIndex + matchLength >= dictLimit) - match = base + matchIndex; /* to prepare for next usage of match[matchLength] */ - } - - if (matchLength > bestLength) { - if (matchLength > matchEndIdx - matchIndex) - matchEndIdx = matchIndex + (U32)matchLength; - bestLength = matchLength; - matches[mnum].off = ZSTD_REP_MOVE_OPT + curr - matchIndex; - matches[mnum].len = (U32)matchLength; - mnum++; - if (matchLength > ZSTD_OPT_NUM) - break; - if (ip + matchLength == iLimit) /* equal : no way to know if inf or sup */ - break; /* drop, to guarantee consistency (miss a little bit of compression) */ - } - - if (match[matchLength] < ip[matchLength]) { - /* match is smaller than curr */ - *smallerPtr = matchIndex; /* update smaller idx */ - commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */ - if (matchIndex <= btLow) { - smallerPtr = &dummy32; - break; - } /* beyond tree size, stop the search */ - smallerPtr = nextPtr + 1; /* new "smaller" => larger of match */ - matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to curr) */ - } else { - /* match is larger than curr */ - *largerPtr = matchIndex; - commonLengthLarger = matchLength; - if (matchIndex <= btLow) { - largerPtr = &dummy32; - break; - } /* beyond tree size, stop the search */ - largerPtr = nextPtr; - matchIndex = nextPtr[0]; - } - } - - *smallerPtr = *largerPtr = 0; - -update: - zc->nextToUpdate = (matchEndIdx > curr + 8) ? matchEndIdx - 8 : curr + 1; - return mnum; -} - -/** Tree updater, providing best match */ -static U32 ZSTD_BtGetAllMatches(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, const U32 maxNbAttempts, const U32 mls, ZSTD_match_t *matches, - const U32 minMatchLen) -{ - if (ip < zc->base + zc->nextToUpdate) - return 0; /* skipped area */ - ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen); -} - -static U32 ZSTD_BtGetAllMatches_selectMLS(ZSTD_CCtx *zc, /* Index table will be updated */ - const BYTE *ip, const BYTE *const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch, - ZSTD_match_t *matches, const U32 minMatchLen) -{ - switch (matchLengthSearch) { - case 3: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); - default: - case 4: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); - case 5: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); - case 7: - case 6: return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); - } -} - -/** Tree updater, providing best match */ -static U32 ZSTD_BtGetAllMatches_extDict(ZSTD_CCtx *zc, const BYTE *const ip, const BYTE *const iLimit, const U32 maxNbAttempts, const U32 mls, - ZSTD_match_t *matches, const U32 minMatchLen) -{ - if (ip < zc->base + zc->nextToUpdate) - return 0; /* skipped area */ - ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls); - return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen); -} - -static U32 ZSTD_BtGetAllMatches_selectMLS_extDict(ZSTD_CCtx *zc, /* Index table will be updated */ - const BYTE *ip, const BYTE *const iHighLimit, const U32 maxNbAttempts, const U32 matchLengthSearch, - ZSTD_match_t *matches, const U32 minMatchLen) -{ - switch (matchLengthSearch) { - case 3: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen); - default: - case 4: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen); - case 5: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen); - case 7: - case 6: return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen); - } -} - -/*-******************************* -* Optimal parser -*********************************/ -FORCE_INLINE -void ZSTD_compressBlock_opt_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const int ultra) -{ - seqStore_t *seqStorePtr = &(ctx->seqStore); - const BYTE *const istart = (const BYTE *)src; - const BYTE *ip = istart; - const BYTE *anchor = istart; - const BYTE *const iend = istart + srcSize; - const BYTE *const ilimit = iend - 8; - const BYTE *const base = ctx->base; - const BYTE *const prefixStart = base + ctx->dictLimit; - - const U32 maxSearches = 1U << ctx->params.cParams.searchLog; - const U32 sufficient_len = ctx->params.cParams.targetLength; - const U32 mls = ctx->params.cParams.searchLength; - const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4; - - ZSTD_optimal_t *opt = seqStorePtr->priceTable; - ZSTD_match_t *matches = seqStorePtr->matchTable; - const BYTE *inr; - U32 offset, rep[ZSTD_REP_NUM]; - - /* init */ - ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_rescaleFreqs(seqStorePtr, (const BYTE *)src, srcSize); - ip += (ip == prefixStart); - { - U32 i; - for (i = 0; i < ZSTD_REP_NUM; i++) - rep[i] = ctx->rep[i]; - } - - /* Match Loop */ - while (ip < ilimit) { - U32 cur, match_num, last_pos, litlen, price; - U32 u, mlen, best_mlen, best_off, litLength; - memset(opt, 0, sizeof(ZSTD_optimal_t)); - last_pos = 0; - litlen = (U32)(ip - anchor); - - /* check repCode */ - { - U32 i, last_i = ZSTD_REP_CHECK + (ip == anchor); - for (i = (ip == anchor); i < last_i; i++) { - const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i]; - if ((repCur > 0) && (repCur < (S32)(ip - prefixStart)) && - (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(ip - repCur, minMatch))) { - mlen = (U32)ZSTD_count(ip + minMatch, ip + minMatch - repCur, iend) + minMatch; - if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; - best_off = i; - cur = 0; - last_pos = 1; - goto _storeSequence; - } - best_off = i - (ip == anchor); - do { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ - mlen--; - } while (mlen >= minMatch); - } - } - } - - match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch); - - if (!last_pos && !match_num) { - ip++; - continue; - } - - if (match_num && (matches[match_num - 1].len > sufficient_len || matches[match_num - 1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num - 1].len; - best_off = matches[match_num - 1].off; - cur = 0; - last_pos = 1; - goto _storeSequence; - } - - /* set prices using matches at position = 0 */ - best_mlen = (last_pos) ? last_pos : minMatch; - for (u = 0; u < match_num; u++) { - mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen; - best_mlen = matches[u].len; - while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */ - mlen++; - } - } - - if (last_pos < minMatch) { - ip++; - continue; - } - - /* initialize opt[0] */ - { - U32 i; - for (i = 0; i < ZSTD_REP_NUM; i++) - opt[0].rep[i] = rep[i]; - } - opt[0].mlen = 1; - opt[0].litlen = litlen; - - /* check further positions */ - for (cur = 1; cur <= last_pos; cur++) { - inr = ip + cur; - - if (opt[cur - 1].mlen == 1) { - litlen = opt[cur - 1].litlen + 1; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - litlen); - } else - price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); - } else { - litlen = 1; - price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - 1); - } - - if (cur > last_pos || price <= opt[cur].price) - SET_PRICE(cur, 1, 0, litlen, price); - - if (cur == last_pos) - break; - - if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ - continue; - - mlen = opt[cur].mlen; - if (opt[cur].off > ZSTD_REP_MOVE_OPT) { - opt[cur].rep[2] = opt[cur - mlen].rep[1]; - opt[cur].rep[1] = opt[cur - mlen].rep[0]; - opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; - } else { - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur - mlen].rep[1] : opt[cur - mlen].rep[2]; - opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur - mlen].rep[0] : opt[cur - mlen].rep[1]; - opt[cur].rep[0] = - ((opt[cur].off == ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur - mlen].rep[0] - 1) : (opt[cur - mlen].rep[opt[cur].off]); - } - - best_mlen = minMatch; - { - U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); - for (i = (opt[cur].mlen != 1); i < last_i; i++) { /* check rep */ - const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; - if ((repCur > 0) && (repCur < (S32)(inr - prefixStart)) && - (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(inr - repCur, minMatch))) { - mlen = (U32)ZSTD_count(inr + minMatch, inr + minMatch - repCur, iend) + minMatch; - - if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; - best_off = i; - last_pos = cur + 1; - goto _storeSequence; - } - - best_off = i - (opt[cur].mlen != 1); - if (mlen > best_mlen) - best_mlen = mlen; - - do { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr - litlen, - best_off, mlen - MINMATCH, ultra); - } else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || price <= opt[cur + mlen].price) - SET_PRICE(cur + mlen, mlen, i, litlen, price); - mlen--; - } while (mlen >= minMatch); - } - } - } - - match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen); - - if (match_num > 0 && (matches[match_num - 1].len > sufficient_len || cur + matches[match_num - 1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num - 1].len; - best_off = matches[match_num - 1].off; - last_pos = cur + 1; - goto _storeSequence; - } - - /* set prices using matches at position = cur */ - for (u = 0; u < match_num; u++) { - mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen; - best_mlen = matches[u].len; - - while (mlen <= best_mlen) { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip + cur - litlen, - matches[u].off - 1, mlen - MINMATCH, ultra); - else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off - 1, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) - SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); - - mlen++; - } - } - } - - best_mlen = opt[last_pos].mlen; - best_off = opt[last_pos].off; - cur = last_pos - best_mlen; - - /* store sequence */ -_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ - opt[0].mlen = 1; - - while (1) { - mlen = opt[cur].mlen; - offset = opt[cur].off; - opt[cur].mlen = best_mlen; - opt[cur].off = best_off; - best_mlen = mlen; - best_off = offset; - if (mlen > cur) - break; - cur -= mlen; - } - - for (u = 0; u <= last_pos;) { - u += opt[u].mlen; - } - - for (cur = 0; cur < last_pos;) { - mlen = opt[cur].mlen; - if (mlen == 1) { - ip++; - cur++; - continue; - } - offset = opt[cur].off; - cur += mlen; - litLength = (U32)(ip - anchor); - - if (offset > ZSTD_REP_MOVE_OPT) { - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = offset - ZSTD_REP_MOVE_OPT; - offset--; - } else { - if (offset != 0) { - best_off = (offset == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); - if (offset != 1) - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = best_off; - } - if (litLength == 0) - offset--; - } - - ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH); - anchor = ip = ip + mlen; - } - } /* for (cur=0; cur < last_pos; ) */ - - /* Save reps for next block */ - { - int i; - for (i = 0; i < ZSTD_REP_NUM; i++) - ctx->repToConfirm[i] = rep[i]; - } - - /* Last Literals */ - { - size_t const lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -FORCE_INLINE -void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const int ultra) -{ - seqStore_t *seqStorePtr = &(ctx->seqStore); - const BYTE *const istart = (const BYTE *)src; - const BYTE *ip = istart; - const BYTE *anchor = istart; - const BYTE *const iend = istart + srcSize; - const BYTE *const ilimit = iend - 8; - const BYTE *const base = ctx->base; - const U32 lowestIndex = ctx->lowLimit; - const U32 dictLimit = ctx->dictLimit; - const BYTE *const prefixStart = base + dictLimit; - const BYTE *const dictBase = ctx->dictBase; - const BYTE *const dictEnd = dictBase + dictLimit; - - const U32 maxSearches = 1U << ctx->params.cParams.searchLog; - const U32 sufficient_len = ctx->params.cParams.targetLength; - const U32 mls = ctx->params.cParams.searchLength; - const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4; - - ZSTD_optimal_t *opt = seqStorePtr->priceTable; - ZSTD_match_t *matches = seqStorePtr->matchTable; - const BYTE *inr; - - /* init */ - U32 offset, rep[ZSTD_REP_NUM]; - { - U32 i; - for (i = 0; i < ZSTD_REP_NUM; i++) - rep[i] = ctx->rep[i]; - } - - ctx->nextToUpdate3 = ctx->nextToUpdate; - ZSTD_rescaleFreqs(seqStorePtr, (const BYTE *)src, srcSize); - ip += (ip == prefixStart); - - /* Match Loop */ - while (ip < ilimit) { - U32 cur, match_num, last_pos, litlen, price; - U32 u, mlen, best_mlen, best_off, litLength; - U32 curr = (U32)(ip - base); - memset(opt, 0, sizeof(ZSTD_optimal_t)); - last_pos = 0; - opt[0].litlen = (U32)(ip - anchor); - - /* check repCode */ - { - U32 i, last_i = ZSTD_REP_CHECK + (ip == anchor); - for (i = (ip == anchor); i < last_i; i++) { - const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i]; - const U32 repIndex = (U32)(curr - repCur); - const BYTE *const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE *const repMatch = repBase + repIndex; - if ((repCur > 0 && repCur <= (S32)curr) && - (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch))) { - /* repcode detected we should take it */ - const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(ip + minMatch, repMatch + minMatch, iend, repEnd, prefixStart) + minMatch; - - if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; - best_off = i; - cur = 0; - last_pos = 1; - goto _storeSequence; - } - - best_off = i - (ip == anchor); - litlen = opt[0].litlen; - do { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */ - mlen--; - } while (mlen >= minMatch); - } - } - } - - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */ - - if (!last_pos && !match_num) { - ip++; - continue; - } - - { - U32 i; - for (i = 0; i < ZSTD_REP_NUM; i++) - opt[0].rep[i] = rep[i]; - } - opt[0].mlen = 1; - - if (match_num && (matches[match_num - 1].len > sufficient_len || matches[match_num - 1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num - 1].len; - best_off = matches[match_num - 1].off; - cur = 0; - last_pos = 1; - goto _storeSequence; - } - - best_mlen = (last_pos) ? last_pos : minMatch; - - /* set prices using matches at position = 0 */ - for (u = 0; u < match_num; u++) { - mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen; - best_mlen = matches[u].len; - litlen = opt[0].litlen; - while (mlen <= best_mlen) { - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra); - if (mlen > last_pos || price < opt[mlen].price) - SET_PRICE(mlen, mlen, matches[u].off, litlen, price); - mlen++; - } - } - - if (last_pos < minMatch) { - ip++; - continue; - } - - /* check further positions */ - for (cur = 1; cur <= last_pos; cur++) { - inr = ip + cur; - - if (opt[cur - 1].mlen == 1) { - litlen = opt[cur - 1].litlen + 1; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - litlen); - } else - price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor); - } else { - litlen = 1; - price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr - 1); - } - - if (cur > last_pos || price <= opt[cur].price) - SET_PRICE(cur, 1, 0, litlen, price); - - if (cur == last_pos) - break; - - if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */ - continue; - - mlen = opt[cur].mlen; - if (opt[cur].off > ZSTD_REP_MOVE_OPT) { - opt[cur].rep[2] = opt[cur - mlen].rep[1]; - opt[cur].rep[1] = opt[cur - mlen].rep[0]; - opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT; - } else { - opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur - mlen].rep[1] : opt[cur - mlen].rep[2]; - opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur - mlen].rep[0] : opt[cur - mlen].rep[1]; - opt[cur].rep[0] = - ((opt[cur].off == ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur - mlen].rep[0] - 1) : (opt[cur - mlen].rep[opt[cur].off]); - } - - best_mlen = minMatch; - { - U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); - for (i = (mlen != 1); i < last_i; i++) { - const S32 repCur = (i == ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; - const U32 repIndex = (U32)(curr + cur - repCur); - const BYTE *const repBase = repIndex < dictLimit ? dictBase : base; - const BYTE *const repMatch = repBase + repIndex; - if ((repCur > 0 && repCur <= (S32)(curr + cur)) && - (((U32)((dictLimit - 1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */ - && (ZSTD_readMINMATCH(inr, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch))) { - /* repcode detected */ - const BYTE *const repEnd = repIndex < dictLimit ? dictEnd : iend; - mlen = (U32)ZSTD_count_2segments(inr + minMatch, repMatch + minMatch, iend, repEnd, prefixStart) + minMatch; - - if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) { - best_mlen = mlen; - best_off = i; - last_pos = cur + 1; - goto _storeSequence; - } - - best_off = i - (opt[cur].mlen != 1); - if (mlen > best_mlen) - best_mlen = mlen; - - do { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) { - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr - litlen, - best_off, mlen - MINMATCH, ultra); - } else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || price <= opt[cur + mlen].price) - SET_PRICE(cur + mlen, mlen, i, litlen, price); - mlen--; - } while (mlen >= minMatch); - } - } - } - - match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch); - - if (match_num > 0 && (matches[match_num - 1].len > sufficient_len || cur + matches[match_num - 1].len >= ZSTD_OPT_NUM)) { - best_mlen = matches[match_num - 1].len; - best_off = matches[match_num - 1].off; - last_pos = cur + 1; - goto _storeSequence; - } - - /* set prices using matches at position = cur */ - for (u = 0; u < match_num; u++) { - mlen = (u > 0) ? matches[u - 1].len + 1 : best_mlen; - best_mlen = matches[u].len; - - while (mlen <= best_mlen) { - if (opt[cur].mlen == 1) { - litlen = opt[cur].litlen; - if (cur > litlen) - price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip + cur - litlen, - matches[u].off - 1, mlen - MINMATCH, ultra); - else - price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off - 1, mlen - MINMATCH, ultra); - } else { - litlen = 0; - price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off - 1, mlen - MINMATCH, ultra); - } - - if (cur + mlen > last_pos || (price < opt[cur + mlen].price)) - SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price); - - mlen++; - } - } - } /* for (cur = 1; cur <= last_pos; cur++) */ - - best_mlen = opt[last_pos].mlen; - best_off = opt[last_pos].off; - cur = last_pos - best_mlen; - - /* store sequence */ -_storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */ - opt[0].mlen = 1; - - while (1) { - mlen = opt[cur].mlen; - offset = opt[cur].off; - opt[cur].mlen = best_mlen; - opt[cur].off = best_off; - best_mlen = mlen; - best_off = offset; - if (mlen > cur) - break; - cur -= mlen; - } - - for (u = 0; u <= last_pos;) { - u += opt[u].mlen; - } - - for (cur = 0; cur < last_pos;) { - mlen = opt[cur].mlen; - if (mlen == 1) { - ip++; - cur++; - continue; - } - offset = opt[cur].off; - cur += mlen; - litLength = (U32)(ip - anchor); - - if (offset > ZSTD_REP_MOVE_OPT) { - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = offset - ZSTD_REP_MOVE_OPT; - offset--; - } else { - if (offset != 0) { - best_off = (offset == ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); - if (offset != 1) - rep[2] = rep[1]; - rep[1] = rep[0]; - rep[0] = best_off; - } - - if (litLength == 0) - offset--; - } - - ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH); - ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen - MINMATCH); - anchor = ip = ip + mlen; - } - } /* for (cur=0; cur < last_pos; ) */ - - /* Save reps for next block */ - { - int i; - for (i = 0; i < ZSTD_REP_NUM; i++) - ctx->repToConfirm[i] = rep[i]; - } - - /* Last Literals */ - { - size_t lastLLSize = iend - anchor; - memcpy(seqStorePtr->lit, anchor, lastLLSize); - seqStorePtr->lit += lastLLSize; - } -} - -#endif /* ZSTD_OPT_H_91842398743 */ diff --git a/mm/Kconfig b/mm/Kconfig index f730605b8dcf..b5e0e9195e91 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -63,6 +63,43 @@ config SPARSEMEM_MANUAL endchoice +config UNEVICTABLE_FILE + bool "Keep some file pages under memory pressure" + depends on SYSCTL + def_bool y + help + Keep some file pages still mapped under memory pressure + to avoid potential disk thrashing that may occur due to + evicting running executables code. + + The UNEVICTABLE_FILE_KBYTES_LOW value defines a threshold + to activate file pages eviction throttling. + The vm.unevictable_file_kbytes_low sysctl knob is used to + change the amount in the runtime (setting it to 0 + effectively disables this feature). + + Recommended value: 262144 for a typical desktop workload. + + The UNEVICTABLE_FILE_KBYTES_MIN value sets the amount of + pages to keep as a hard limit. + The vm.unevictable_file_kbytes_min sysctl knob is used to + change the amount in the runtime (setting it to 0 + effectively disables this feature). + + Recommended value: 131072 for a typical desktop workload. + + See also: Documentation/admin-guide/sysctl/vm.rst + +config UNEVICTABLE_FILE_KBYTES_LOW + int "Default value for vm.unevictable_file_kbytes_low" + depends on UNEVICTABLE_FILE + default "262144" + +config UNEVICTABLE_FILE_KBYTES_MIN + int "Default value for vm.unevictable_file_kbytes_min" + depends on UNEVICTABLE_FILE + default "131072" + config DISCONTIGMEM def_bool y depends on (!SELECT_MEMORY_MODEL && ARCH_DISCONTIGMEM_ENABLE) || DISCONTIGMEM_MANUAL diff --git a/mm/compaction.c b/mm/compaction.c index 190ccdaa6c19..5339e9294d62 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -1288,7 +1288,7 @@ static void fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated) { unsigned long start_pfn, end_pfn; - struct page *page = pfn_to_page(pfn); + struct page *page; /* Do not search around if there are enough pages already */ if (cc->nr_freepages >= cc->nr_migratepages) @@ -1299,8 +1299,12 @@ fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long return; /* Pageblock boundaries */ - start_pfn = pageblock_start_pfn(pfn); - end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone)) - 1; + start_pfn = max(pageblock_start_pfn(pfn), cc->zone->zone_start_pfn); + end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone)); + + page = pageblock_pfn_to_page(start_pfn, end_pfn, cc->zone); + if (!page) + return; /* Scan before */ if (start_pfn != pfn) { @@ -1402,7 +1406,8 @@ fast_isolate_freepages(struct compact_control *cc) pfn = page_to_pfn(freepage); if (pfn >= highest) - highest = pageblock_start_pfn(pfn); + highest = max(pageblock_start_pfn(pfn), + cc->zone->zone_start_pfn); if (pfn >= low_pfn) { cc->fast_search_fail = 0; @@ -1472,7 +1477,8 @@ fast_isolate_freepages(struct compact_control *cc) } else { if (cc->direct_compaction && pfn_valid(min_pfn)) { page = pageblock_pfn_to_page(min_pfn, - pageblock_end_pfn(min_pfn), + min(pageblock_end_pfn(min_pfn), + zone_end_pfn(cc->zone)), cc->zone); cc->free_pfn = min_pfn; } @@ -1702,6 +1708,7 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc) unsigned long pfn = cc->migrate_pfn; unsigned long high_pfn; int order; + bool found_block = false; /* Skip hints are relied on to avoid repeats on the fast search */ if (cc->ignore_skip_hint) @@ -1744,7 +1751,7 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc) high_pfn = pageblock_start_pfn(cc->migrate_pfn + distance); for (order = cc->order - 1; - order >= PAGE_ALLOC_COSTLY_ORDER && pfn == cc->migrate_pfn && nr_scanned < limit; + order >= PAGE_ALLOC_COSTLY_ORDER && !found_block && nr_scanned < limit; order--) { struct free_area *area = &cc->zone->free_area[order]; struct list_head *freelist; @@ -1759,7 +1766,11 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc) list_for_each_entry(freepage, freelist, lru) { unsigned long free_pfn; - nr_scanned++; + if (nr_scanned++ >= limit) { + move_freelist_tail(freelist, freepage); + break; + } + free_pfn = page_to_pfn(freepage); if (free_pfn < high_pfn) { /* @@ -1768,12 +1779,8 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc) * the list assumes an entry is deleted, not * reordered. */ - if (get_pageblock_skip(freepage)) { - if (list_is_last(freelist, &freepage->lru)) - break; - + if (get_pageblock_skip(freepage)) continue; - } /* Reorder to so a future search skips recent pages */ move_freelist_tail(freelist, freepage); @@ -1781,15 +1788,10 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc) update_fast_start_pfn(cc, free_pfn); pfn = pageblock_start_pfn(free_pfn); cc->fast_search_fail = 0; + found_block = true; set_pageblock_skip(freepage); break; } - - if (nr_scanned >= limit) { - cc->fast_search_fail++; - move_freelist_tail(freelist, freepage); - break; - } } spin_unlock_irqrestore(&cc->zone->lock, flags); } @@ -1800,9 +1802,10 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc) * If fast scanning failed then use a cached entry for a page block * that had free pages as the basis for starting a linear scan. */ - if (pfn == cc->migrate_pfn) + if (!found_block) { + cc->fast_search_fail++; pfn = reinit_migrate_pfn(cc); - + } return pfn; } @@ -1926,20 +1929,28 @@ static bool kswapd_is_running(pg_data_t *pgdat) /* * A zone's fragmentation score is the external fragmentation wrt to the - * COMPACTION_HPAGE_ORDER scaled by the zone's size. It returns a value - * in the range [0, 100]. + * COMPACTION_HPAGE_ORDER. It returns a value in the range [0, 100]. + */ +static unsigned int fragmentation_score_zone(struct zone *zone) +{ + return extfrag_for_order(zone, COMPACTION_HPAGE_ORDER); +} + +/* + * A weighted zone's fragmentation score is the external fragmentation + * wrt to the COMPACTION_HPAGE_ORDER scaled by the zone's size. It + * returns a value in the range [0, 100]. * * The scaling factor ensures that proactive compaction focuses on larger * zones like ZONE_NORMAL, rather than smaller, specialized zones like * ZONE_DMA32. For smaller zones, the score value remains close to zero, * and thus never exceeds the high threshold for proactive compaction. */ -static unsigned int fragmentation_score_zone(struct zone *zone) +static unsigned int fragmentation_score_zone_weighted(struct zone *zone) { unsigned long score; - score = zone->present_pages * - extfrag_for_order(zone, COMPACTION_HPAGE_ORDER); + score = zone->present_pages * fragmentation_score_zone(zone); return div64_ul(score, zone->zone_pgdat->node_present_pages + 1); } @@ -1959,7 +1970,7 @@ static unsigned int fragmentation_score_node(pg_data_t *pgdat) struct zone *zone; zone = &pgdat->node_zones[zoneid]; - score += fragmentation_score_zone(zone); + score += fragmentation_score_zone_weighted(zone); } return score; diff --git a/mm/highmem.c b/mm/highmem.c index 874b732b120c..6ef8f5e05e7e 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -368,20 +368,24 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1, BUG_ON(end1 > page_size(page) || end2 > page_size(page)); + if (start1 >= end1) + start1 = end1 = 0; + if (start2 >= end2) + start2 = end2 = 0; + for (i = 0; i < compound_nr(page); i++) { void *kaddr = NULL; - if (start1 < PAGE_SIZE || start2 < PAGE_SIZE) - kaddr = kmap_atomic(page + i); - if (start1 >= PAGE_SIZE) { start1 -= PAGE_SIZE; end1 -= PAGE_SIZE; } else { unsigned this_end = min_t(unsigned, end1, PAGE_SIZE); - if (end1 > start1) + if (end1 > start1) { + kaddr = kmap_atomic(page + i); memset(kaddr + start1, 0, this_end - start1); + } end1 -= this_end; start1 = 0; } @@ -392,8 +396,11 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1, } else { unsigned this_end = min_t(unsigned, end2, PAGE_SIZE); - if (end2 > start2) + if (end2 > start2) { + if (!kaddr) + kaddr = kmap_atomic(page + i); memset(kaddr + start2, 0, this_end - start2); + } end2 -= this_end; start2 = 0; } @@ -611,7 +618,7 @@ void __kmap_local_sched_out(void) int idx; /* With debug all even slots are unmapped and act as guard */ - if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) { + if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { WARN_ON_ONCE(!pte_none(pteval)); continue; } @@ -647,7 +654,7 @@ void __kmap_local_sched_in(void) int idx; /* With debug all even slots are unmapped and act as guard */ - if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) { + if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { WARN_ON_ONCE(!pte_none(pteval)); continue; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 91ca9b103ee5..f3affe860e2b 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2465,7 +2465,7 @@ static void __split_huge_page(struct page *page, struct list_head *list, int i; /* complete memcg works before add pages to LRU */ - mem_cgroup_split_huge_fixup(head); + split_page_memcg(head, nr); if (PageAnon(head) && PageSwapCache(head)) { swp_entry_t entry = { .val = page_private(head) }; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 4bdb58ab14cb..8e89b277ffcc 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -285,6 +285,17 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg, nrg->reservation_counter = &h_cg->rsvd_hugepage[hstate_index(h)]; nrg->css = &h_cg->css; + /* + * The caller will hold exactly one h_cg->css reference for the + * whole contiguous reservation region. But this area might be + * scattered when there are already some file_regions reside in + * it. As a result, many file_regions may share only one css + * reference. In order to ensure that one file_region must hold + * exactly one h_cg->css reference, we should do css_get for + * each file_region and leave the reference held by caller + * untouched. + */ + css_get(&h_cg->css); if (!resv->pages_per_hpage) resv->pages_per_hpage = pages_per_huge_page(h); /* pages_per_hpage should be the same for all entries in @@ -298,6 +309,14 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg, #endif } +static void put_uncharge_info(struct file_region *rg) +{ +#ifdef CONFIG_CGROUP_HUGETLB + if (rg->css) + css_put(rg->css); +#endif +} + static bool has_same_uncharge_info(struct file_region *rg, struct file_region *org) { @@ -321,6 +340,7 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg) prg->to = rg->to; list_del(&rg->link); + put_uncharge_info(rg); kfree(rg); rg = prg; @@ -332,6 +352,7 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg) nrg->from = rg->from; list_del(&rg->link); + put_uncharge_info(rg); kfree(rg); } } @@ -664,7 +685,7 @@ static long region_del(struct resv_map *resv, long f, long t) del += t - f; hugetlb_cgroup_uncharge_file_region( - resv, rg, t - f); + resv, rg, t - f, false); /* New entry for end of split region */ nrg->from = t; @@ -685,7 +706,7 @@ static long region_del(struct resv_map *resv, long f, long t) if (f <= rg->from && t >= rg->to) { /* Remove entire region */ del += rg->to - rg->from; hugetlb_cgroup_uncharge_file_region(resv, rg, - rg->to - rg->from); + rg->to - rg->from, true); list_del(&rg->link); kfree(rg); continue; @@ -693,13 +714,13 @@ static long region_del(struct resv_map *resv, long f, long t) if (f <= rg->from) { /* Trim beginning of region */ hugetlb_cgroup_uncharge_file_region(resv, rg, - t - rg->from); + t - rg->from, false); del += t - rg->from; rg->from = t; } else { /* Trim end of region */ hugetlb_cgroup_uncharge_file_region(resv, rg, - rg->to - f); + rg->to - f, false); del += rg->to - f; rg->to = f; @@ -1312,14 +1333,16 @@ static inline void destroy_compound_gigantic_page(struct page *page, static void update_and_free_page(struct hstate *h, struct page *page) { int i; + struct page *subpage = page; if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported()) return; h->nr_huge_pages--; h->nr_huge_pages_node[page_to_nid(page)]--; - for (i = 0; i < pages_per_huge_page(h); i++) { - page[i].flags &= ~(1 << PG_locked | 1 << PG_error | + for (i = 0; i < pages_per_huge_page(h); + i++, subpage = mem_map_next(subpage, page, i)) { + subpage->flags &= ~(1 << PG_locked | 1 << PG_error | 1 << PG_referenced | 1 << PG_dirty | 1 << PG_active | 1 << PG_private | 1 << PG_writeback); @@ -2520,7 +2543,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) if (hstate_is_gigantic(h)) { if (hugetlb_cma_size) { pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n"); - break; + goto free; } if (!alloc_bootmem_huge_page(h)) break; @@ -2538,7 +2561,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h) h->max_huge_pages, buf, i); h->max_huge_pages = i; } - +free: kfree(node_alloc_noretry); } @@ -2988,8 +3011,10 @@ static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent, return -ENOMEM; retval = sysfs_create_group(hstate_kobjs[hi], hstate_attr_group); - if (retval) + if (retval) { kobject_put(hstate_kobjs[hi]); + hstate_kobjs[hi] = NULL; + } return retval; } @@ -5187,6 +5212,10 @@ int hugetlb_reserve_pages(struct inode *inode, */ long rsv_adjust; + /* + * hugetlb_cgroup_uncharge_cgroup_rsvd() will put the + * reference to h_cg->css. See comment below for detail. + */ hugetlb_cgroup_uncharge_cgroup_rsvd( hstate_index(h), (chg - add) * pages_per_huge_page(h), h_cg); @@ -5194,6 +5223,14 @@ int hugetlb_reserve_pages(struct inode *inode, rsv_adjust = hugepage_subpool_put_pages(spool, chg - add); hugetlb_acct_memory(h, -rsv_adjust); + } else if (h_cg) { + /* + * The file_regions will hold their own reference to + * h_cg->css. So we should release the reference held + * via hugetlb_cgroup_charge_cgroup_rsvd() when we are + * done. + */ + hugetlb_cgroup_put_rsvd_cgroup(h_cg); } } return 0; @@ -5300,21 +5337,23 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr) void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end) { - unsigned long a_start, a_end; + unsigned long v_start = ALIGN(vma->vm_start, PUD_SIZE), + v_end = ALIGN_DOWN(vma->vm_end, PUD_SIZE); - if (!(vma->vm_flags & VM_MAYSHARE)) + /* + * vma need span at least one aligned PUD size and the start,end range + * must at least partialy within it. + */ + if (!(vma->vm_flags & VM_MAYSHARE) || !(v_end > v_start) || + (*end <= v_start) || (*start >= v_end)) return; /* Extend the range to be PUD aligned for a worst case scenario */ - a_start = ALIGN_DOWN(*start, PUD_SIZE); - a_end = ALIGN(*end, PUD_SIZE); + if (*start > v_start) + *start = ALIGN_DOWN(*start, PUD_SIZE); - /* - * Intersect the range with the vma range, since pmd sharing won't be - * across vma after all - */ - *start = max(vma->vm_start, a_start); - *end = min(vma->vm_end, a_end); + if (*end < v_end) + *end = ALIGN(*end, PUD_SIZE); } /* diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index 9182848dda3e..1348819f546c 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -391,7 +391,8 @@ void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, struct file_region *rg, - unsigned long nr_pages) + unsigned long nr_pages, + bool region_del) { if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages) return; @@ -400,7 +401,12 @@ void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, !resv->reservation_counter) { page_counter_uncharge(rg->reservation_counter, nr_pages * resv->pages_per_hpage); - css_put(rg->css); + /* + * Only do css_put(rg->css) when we delete the entire region + * because one file_region must hold exactly one css reference. + */ + if (region_del) + css_put(rg->css); } } diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 67ab391a5373..494d3cb0b58a 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -442,18 +442,28 @@ static inline int khugepaged_test_exit(struct mm_struct *mm) static bool hugepage_vma_check(struct vm_area_struct *vma, unsigned long vm_flags) { - if ((!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) || - (vm_flags & VM_NOHUGEPAGE) || + /* Explicitly disabled through madvise. */ + if ((vm_flags & VM_NOHUGEPAGE) || test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)) return false; - if (shmem_file(vma->vm_file) || - (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && - vma->vm_file && - (vm_flags & VM_DENYWRITE))) { + /* Enabled via shmem mount options or sysfs settings. */ + if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) { return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, HPAGE_PMD_NR); } + + /* THP settings require madvise. */ + if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) + return false; + + /* Read-only file mappings need to be aligned for THP to work. */ + if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file && + (vm_flags & VM_DENYWRITE)) { + return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff, + HPAGE_PMD_NR); + } + if (!vma->anon_vma || vma->vm_ops) return false; if (vma_is_temporary_stack(vma)) diff --git a/mm/ksm.c b/mm/ksm.c index 9694ee2c71de..61fa1a92965f 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -2434,54 +2434,78 @@ static int ksm_scan_thread(void *nothing) return 0; } -int ksm_madvise(struct vm_area_struct *vma, unsigned long start, - unsigned long end, int advice, unsigned long *vm_flags) +int ksm_madvise_merge(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *vm_flags) { - struct mm_struct *mm = vma->vm_mm; int err; - switch (advice) { - case MADV_MERGEABLE: - /* - * Be somewhat over-protective for now! - */ - if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | - VM_PFNMAP | VM_IO | VM_DONTEXPAND | - VM_HUGETLB | VM_MIXEDMAP)) - return 0; /* just ignore the advice */ + /* + * Be somewhat over-protective for now! + */ + if (*vm_flags & (VM_MERGEABLE | VM_SHARED | VM_MAYSHARE | + VM_PFNMAP | VM_IO | VM_DONTEXPAND | + VM_HUGETLB | VM_MIXEDMAP)) + return 0; /* just ignore the advice */ - if (vma_is_dax(vma)) - return 0; + if (vma_is_dax(vma)) + return 0; #ifdef VM_SAO if (*vm_flags & VM_SAO) return 0; #endif #ifdef VM_SPARC_ADI - if (*vm_flags & VM_SPARC_ADI) - return 0; + if (*vm_flags & VM_SPARC_ADI) + return 0; #endif - if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { - err = __ksm_enter(mm); - if (err) - return err; - } + if (!test_bit(MMF_VM_MERGEABLE, &mm->flags)) { + err = __ksm_enter(mm); + if (err) + return err; + } - *vm_flags |= VM_MERGEABLE; - break; + *vm_flags |= VM_MERGEABLE; - case MADV_UNMERGEABLE: - if (!(*vm_flags & VM_MERGEABLE)) - return 0; /* just ignore the advice */ + return 0; +} - if (vma->anon_vma) { - err = unmerge_ksm_pages(vma, start, end); - if (err) - return err; - } +int ksm_madvise_unmerge(struct vm_area_struct *vma, unsigned long start, + unsigned long end, unsigned long *vm_flags) +{ + int err; + + if (!(*vm_flags & VM_MERGEABLE)) + return 0; /* just ignore the advice */ + + if (vma->anon_vma) { + err = unmerge_ksm_pages(vma, start, end); + if (err) + return err; + } - *vm_flags &= ~VM_MERGEABLE; + *vm_flags &= ~VM_MERGEABLE; + + return 0; +} + +int ksm_madvise(struct vm_area_struct *vma, unsigned long start, + unsigned long end, int advice, unsigned long *vm_flags) +{ + struct mm_struct *mm = vma->vm_mm; + int err; + + switch (advice) { + case MADV_MERGEABLE: + err = ksm_madvise_merge(mm, vma, vm_flags); + if (err) + return err; + break; + + case MADV_UNMERGEABLE: + err = ksm_madvise_unmerge(vma, start, end, vm_flags); + if (err) + return err; break; } diff --git a/mm/madvise.c b/mm/madvise.c index 6a660858784b..a9bcd16b5d95 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -1197,12 +1197,22 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, goto release_task; } - mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS); + /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */ + mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); if (IS_ERR_OR_NULL(mm)) { ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; goto release_task; } + /* + * Require CAP_SYS_NICE for influencing process performance. Note that + * only non-destructive hints are currently supported. + */ + if (!capable(CAP_SYS_NICE)) { + ret = -EPERM; + goto release_mm; + } + total_len = iov_iter_count(&iter); while (iov_iter_count(&iter)) { @@ -1217,6 +1227,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec, if (ret == 0) ret = total_len - iov_iter_count(&iter); +release_mm: mmput(mm); release_task: put_task_struct(task); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 913c2b9e5c72..aa9b9536649a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1080,13 +1080,9 @@ static __always_inline struct mem_cgroup *get_active_memcg(void) rcu_read_lock(); memcg = active_memcg(); - if (memcg) { - /* current->active_memcg must hold a ref. */ - if (WARN_ON_ONCE(!css_tryget(&memcg->css))) - memcg = root_mem_cgroup; - else - memcg = current->active_memcg; - } + /* remote memcg must hold a ref. */ + if (memcg && WARN_ON_ONCE(!css_tryget(&memcg->css))) + memcg = root_mem_cgroup; rcu_read_unlock(); return memcg; @@ -3300,24 +3296,21 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size) #endif /* CONFIG_MEMCG_KMEM */ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE /* - * Because page_memcg(head) is not set on compound tails, set it now. + * Because page_memcg(head) is not set on tails, set it now. */ -void mem_cgroup_split_huge_fixup(struct page *head) +void split_page_memcg(struct page *head, unsigned int nr) { struct mem_cgroup *memcg = page_memcg(head); int i; - if (mem_cgroup_disabled()) + if (mem_cgroup_disabled() || !memcg) return; - for (i = 1; i < HPAGE_PMD_NR; i++) { - css_get(&memcg->css); - head[i].memcg_data = (unsigned long)memcg; - } + for (i = 1; i < nr; i++) + head[i].memcg_data = head->memcg_data; + css_get_many(&memcg->css, nr - 1); } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_MEMCG_SWAP /** @@ -5637,10 +5630,8 @@ static int mem_cgroup_move_account(struct page *page, __mod_lruvec_state(from_vec, NR_ANON_MAPPED, -nr_pages); __mod_lruvec_state(to_vec, NR_ANON_MAPPED, nr_pages); if (PageTransHuge(page)) { - __mod_lruvec_state(from_vec, NR_ANON_THPS, - -nr_pages); - __mod_lruvec_state(to_vec, NR_ANON_THPS, - nr_pages); + __dec_lruvec_state(from_vec, NR_ANON_THPS); + __inc_lruvec_state(to_vec, NR_ANON_THPS); } } @@ -6760,7 +6751,19 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) memcg_check_events(memcg, page); local_irq_enable(); - if (PageSwapCache(page)) { + /* + * Cgroup1's unified memory+swap counter has been charged with the + * new swapcache page, finish the transfer by uncharging the swap + * slot. The swap slot would also get uncharged when it dies, but + * it can stick around indefinitely and we'd count the page twice + * the entire time. + * + * Cgroup2 has separate resource counters for memory and swap, + * so this is a non-issue here. Memory and swap charge lifetimes + * correspond 1:1 to page and swap slot lifetimes: we charge the + * page to memory here, and uncharge swap when the slot is freed. + */ + if (do_memsw_account() && PageSwapCache(page)) { swp_entry_t entry = { .val = page_private(page) }; /* * The swap entry might not get freed for a long time, diff --git a/mm/memory-failure.c b/mm/memory-failure.c index e9481632fcd1..4e3684d694c1 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1308,6 +1308,12 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags, */ put_page(page); + /* device metadata space is not recoverable */ + if (!pgmap_pfn_valid(pgmap, pfn)) { + rc = -ENXIO; + goto out; + } + /* * Prevent the inode from being freed while we are interrogating * the address_space, typically this would be handled by diff --git a/mm/memory.c b/mm/memory.c index feff48e1465a..97e1d045f236 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2165,11 +2165,11 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned long pfn, pgprot_t prot) { - pte_t *pte; + pte_t *pte, *mapped_pte; spinlock_t *ptl; int err = 0; - pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); + mapped_pte = pte = pte_alloc_map_lock(mm, pmd, addr, &ptl); if (!pte) return -ENOMEM; arch_enter_lazy_mmu_mode(); @@ -2183,7 +2183,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd, pfn++; } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); - pte_unmap_unlock(pte - 1, ptl); + pte_unmap_unlock(mapped_pte, ptl); return err; } @@ -3092,6 +3092,14 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf) return handle_userfault(vmf, VM_UFFD_WP); } + /* + * Userfaultfd write-protect can defer flushes. Ensure the TLB + * is flushed in this case before copying. + */ + if (unlikely(userfaultfd_wp(vmf->vma) && + mm_tlb_flush_pending(vmf->vma->vm_mm))) + flush_tlb_page(vmf->vma, vmf->address); + vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte); if (!vmf->page) { /* @@ -4709,9 +4717,9 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) } #endif /* __PAGETABLE_PMD_FOLDED */ -int follow_pte(struct mm_struct *mm, unsigned long address, - struct mmu_notifier_range *range, pte_t **ptepp, pmd_t **pmdpp, - spinlock_t **ptlp) +int follow_invalidate_pte(struct mm_struct *mm, unsigned long address, + struct mmu_notifier_range *range, pte_t **ptepp, + pmd_t **pmdpp, spinlock_t **ptlp) { pgd_t *pgd; p4d_t *p4d; @@ -4776,6 +4784,34 @@ int follow_pte(struct mm_struct *mm, unsigned long address, return -EINVAL; } +/** + * follow_pte - look up PTE at a user virtual address + * @mm: the mm_struct of the target address space + * @address: user virtual address + * @ptepp: location to store found PTE + * @ptlp: location to store the lock for the PTE + * + * On a successful return, the pointer to the PTE is stored in @ptepp; + * the corresponding lock is taken and its location is stored in @ptlp. + * The contents of the PTE are only stable until @ptlp is released; + * any further use, if any, must be protected against invalidation + * with MMU notifiers. + * + * Only IO mappings and raw PFN mappings are allowed. The mmap semaphore + * should be taken for read. + * + * KVM uses this function. While it is arguably less bad than ``follow_pfn``, + * it is not a good general-purpose API. + * + * Return: zero on success, -ve otherwise. + */ +int follow_pte(struct mm_struct *mm, unsigned long address, + pte_t **ptepp, spinlock_t **ptlp) +{ + return follow_invalidate_pte(mm, address, NULL, ptepp, NULL, ptlp); +} +EXPORT_SYMBOL_GPL(follow_pte); + /** * follow_pfn - look up PFN at a user virtual address * @vma: memory mapping @@ -4784,6 +4820,9 @@ int follow_pte(struct mm_struct *mm, unsigned long address, * * Only IO mappings and raw PFN mappings are allowed. * + * This function does not allow the caller to read the permissions + * of the PTE. Do not use it. + * * Return: zero and the pfn at @pfn on success, -ve otherwise. */ int follow_pfn(struct vm_area_struct *vma, unsigned long address, @@ -4796,7 +4835,7 @@ int follow_pfn(struct vm_area_struct *vma, unsigned long address, if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) return ret; - ret = follow_pte(vma->vm_mm, address, NULL, &ptep, NULL, &ptl); + ret = follow_pte(vma->vm_mm, address, &ptep, &ptl); if (ret) return ret; *pfn = pte_pfn(*ptep); @@ -4817,7 +4856,7 @@ int follow_phys(struct vm_area_struct *vma, if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) goto out; - if (follow_pte(vma->vm_mm, address, NULL, &ptep, NULL, &ptl)) + if (follow_pte(vma->vm_mm, address, &ptep, &ptl)) goto out; pte = *ptep; @@ -5173,17 +5212,19 @@ long copy_huge_page_from_user(struct page *dst_page, void *page_kaddr; unsigned long i, rc = 0; unsigned long ret_val = pages_per_huge_page * PAGE_SIZE; + struct page *subpage = dst_page; - for (i = 0; i < pages_per_huge_page; i++) { + for (i = 0; i < pages_per_huge_page; + i++, subpage = mem_map_next(subpage, dst_page, i)) { if (allow_pagefault) - page_kaddr = kmap(dst_page + i); + page_kaddr = kmap(subpage); else - page_kaddr = kmap_atomic(dst_page + i); + page_kaddr = kmap_atomic(subpage); rc = copy_from_user(page_kaddr, (const void __user *)(src + i * PAGE_SIZE), PAGE_SIZE); if (allow_pagefault) - kunmap(dst_page + i); + kunmap(subpage); else kunmap_atomic(page_kaddr); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index f9d57b9be8c7..fc16732d07f7 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1019,7 +1019,7 @@ static int online_memory_block(struct memory_block *mem, void *arg) */ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags) { - struct mhp_params params = { .pgprot = PAGE_KERNEL }; + struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) }; u64 start, size; bool new_node = false; int ret; diff --git a/mm/memremap.c b/mm/memremap.c index 16b2fb482da1..2455bac89506 100644 --- a/mm/memremap.c +++ b/mm/memremap.c @@ -80,6 +80,21 @@ static unsigned long pfn_first(struct dev_pagemap *pgmap, int range_id) return pfn + vmem_altmap_offset(pgmap_altmap(pgmap)); } +bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn) +{ + int i; + + for (i = 0; i < pgmap->nr_range; i++) { + struct range *range = &pgmap->ranges[i]; + + if (pfn >= PHYS_PFN(range->start) && + pfn <= PHYS_PFN(range->end)) + return pfn >= pfn_first(pgmap, i); + } + + return false; +} + static unsigned long pfn_end(struct dev_pagemap *pgmap, int range_id) { const struct range *range = &pgmap->ranges[range_id]; diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 61ee40ed804e..459d195d2ff6 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -501,10 +501,33 @@ static int mn_hlist_invalidate_range_start( ""); WARN_ON(mmu_notifier_range_blockable(range) || _ret != -EAGAIN); + /* + * We call all the notifiers on any EAGAIN, + * there is no way for a notifier to know if + * its start method failed, thus a start that + * does EAGAIN can't also do end. + */ + WARN_ON(ops->invalidate_range_end); ret = _ret; } } } + + if (ret) { + /* + * Must be non-blocking to get here. If there are multiple + * notifiers and one or more failed start, any that succeeded + * start are expecting their end to be called. Do so now. + */ + hlist_for_each_entry_rcu(subscription, &subscriptions->list, + hlist, srcu_read_lock_held(&srcu)) { + if (!subscription->ops->invalidate_range_end) + continue; + + subscription->ops->invalidate_range_end(subscription, + range); + } + } srcu_read_unlock(&srcu, id); return ret; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 519a60d5b6f7..a723e81a5da2 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1281,6 +1281,12 @@ static __always_inline bool free_pages_prepare(struct page *page, kernel_poison_pages(page, 1 << order); + /* + * With hardware tag-based KASAN, memory tags must be set before the + * page becomes unavailable via debug_pagealloc or arch_free_page. + */ + kasan_free_nondeferred_pages(page, order); + /* * arch_free_page() can make the page's contents inaccessible. s390 * does this. So nothing which can access the page's contents should @@ -1290,8 +1296,6 @@ static __always_inline bool free_pages_prepare(struct page *page, debug_pagealloc_unmap_pages(page, 1 << order); - kasan_free_nondeferred_pages(page, order); - return true; } @@ -3309,6 +3313,7 @@ void split_page(struct page *page, unsigned int order) for (i = 1; i < (1 << order); i++) set_page_refcounted(page + i); split_page_owner(page, 1 << order); + split_page_memcg(page, 1 << order); } EXPORT_SYMBOL_GPL(split_page); @@ -6257,13 +6262,66 @@ static void __meminit zone_init_free_lists(struct zone *zone) } } +#if !defined(CONFIG_FLAT_NODE_MEM_MAP) +/* + * Only struct pages that correspond to ranges defined by memblock.memory + * are zeroed and initialized by going through __init_single_page() during + * memmap_init_zone(). + * + * But, there could be struct pages that correspond to holes in + * memblock.memory. This can happen because of the following reasons: + * - physical memory bank size is not necessarily the exact multiple of the + * arbitrary section size + * - early reserved memory may not be listed in memblock.memory + * - memory layouts defined with memmap= kernel parameter may not align + * nicely with memmap sections + * + * Explicitly initialize those struct pages so that: + * - PG_Reserved is set + * - zone and node links point to zone and node that span the page if the + * hole is in the middle of a zone + * - zone and node links point to adjacent zone/node if the hole falls on + * the zone boundary; the pages in such holes will be prepended to the + * zone/node above the hole except for the trailing pages in the last + * section that will be appended to the zone/node below. + */ +static u64 __meminit init_unavailable_range(unsigned long spfn, + unsigned long epfn, + int zone, int node) +{ + unsigned long pfn; + u64 pgcnt = 0; + + for (pfn = spfn; pfn < epfn; pfn++) { + if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) { + pfn = ALIGN_DOWN(pfn, pageblock_nr_pages) + + pageblock_nr_pages - 1; + continue; + } + __init_single_page(pfn_to_page(pfn), pfn, zone, node); + __SetPageReserved(pfn_to_page(pfn)); + pgcnt++; + } + + return pgcnt; +} +#else +static inline u64 init_unavailable_range(unsigned long spfn, unsigned long epfn, + int zone, int node) +{ + return 0; +} +#endif + void __meminit __weak memmap_init(unsigned long size, int nid, unsigned long zone, unsigned long range_start_pfn) { + static unsigned long hole_pfn; unsigned long start_pfn, end_pfn; unsigned long range_end_pfn = range_start_pfn + size; int i; + u64 pgcnt = 0; for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) { start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn); @@ -6274,7 +6332,29 @@ void __meminit __weak memmap_init(unsigned long size, int nid, memmap_init_zone(size, nid, zone, start_pfn, range_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE); } + + if (hole_pfn < start_pfn) + pgcnt += init_unavailable_range(hole_pfn, start_pfn, + zone, nid); + hole_pfn = end_pfn; } + +#ifdef CONFIG_SPARSEMEM + /* + * Initialize the hole in the range [zone_end_pfn, section_end]. + * If zone boundary falls in the middle of a section, this hole + * will be re-initialized during the call to this function for the + * higher zone. + */ + end_pfn = round_up(range_end_pfn, PAGES_PER_SECTION); + if (hole_pfn < end_pfn) + pgcnt += init_unavailable_range(hole_pfn, end_pfn, + zone, nid); +#endif + + if (pgcnt) + pr_info(" %s zone: %llu pages in unavailable ranges\n", + zone_names[zone], pgcnt); } static int zone_batchsize(struct zone *zone) @@ -7075,88 +7155,6 @@ void __init free_area_init_memoryless_node(int nid) free_area_init_node(nid); } -#if !defined(CONFIG_FLAT_NODE_MEM_MAP) -/* - * Initialize all valid struct pages in the range [spfn, epfn) and mark them - * PageReserved(). Return the number of struct pages that were initialized. - */ -static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn) -{ - unsigned long pfn; - u64 pgcnt = 0; - - for (pfn = spfn; pfn < epfn; pfn++) { - if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) { - pfn = ALIGN_DOWN(pfn, pageblock_nr_pages) - + pageblock_nr_pages - 1; - continue; - } - /* - * Use a fake node/zone (0) for now. Some of these pages - * (in memblock.reserved but not in memblock.memory) will - * get re-initialized via reserve_bootmem_region() later. - */ - __init_single_page(pfn_to_page(pfn), pfn, 0, 0); - __SetPageReserved(pfn_to_page(pfn)); - pgcnt++; - } - - return pgcnt; -} - -/* - * Only struct pages that are backed by physical memory are zeroed and - * initialized by going through __init_single_page(). But, there are some - * struct pages which are reserved in memblock allocator and their fields - * may be accessed (for example page_to_pfn() on some configuration accesses - * flags). We must explicitly initialize those struct pages. - * - * This function also addresses a similar issue where struct pages are left - * uninitialized because the physical address range is not covered by - * memblock.memory or memblock.reserved. That could happen when memblock - * layout is manually configured via memmap=, or when the highest physical - * address (max_pfn) does not end on a section boundary. - */ -static void __init init_unavailable_mem(void) -{ - phys_addr_t start, end; - u64 i, pgcnt; - phys_addr_t next = 0; - - /* - * Loop through unavailable ranges not covered by memblock.memory. - */ - pgcnt = 0; - for_each_mem_range(i, &start, &end) { - if (next < start) - pgcnt += init_unavailable_range(PFN_DOWN(next), - PFN_UP(start)); - next = end; - } - - /* - * Early sections always have a fully populated memmap for the whole - * section - see pfn_valid(). If the last section has holes at the - * end and that section is marked "online", the memmap will be - * considered initialized. Make sure that memmap has a well defined - * state. - */ - pgcnt += init_unavailable_range(PFN_DOWN(next), - round_up(max_pfn, PAGES_PER_SECTION)); - - /* - * Struct pages that do not have backing memory. This could be because - * firmware is using some of this memory, or for some other reasons. - */ - if (pgcnt) - pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt); -} -#else -static inline void __init init_unavailable_mem(void) -{ -} -#endif /* !CONFIG_FLAT_NODE_MEM_MAP */ - #if MAX_NUMNODES > 1 /* * Figure out the number of possible node ids. @@ -7580,7 +7578,6 @@ void __init free_area_init(unsigned long *max_zone_pfn) /* Initialise every node */ mminit_verify_pageflags_layout(); setup_nr_node_ids(); - init_unavailable_mem(); for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); free_area_init_node(nid); diff --git a/mm/page_io.c b/mm/page_io.c index 9bca17ecc4df..21f3160d39a8 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -273,11 +273,6 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) return ret; } -static sector_t swap_page_sector(struct page *page) -{ - return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9); -} - static inline void count_swpout_vm_event(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/mm/slab_common.c b/mm/slab_common.c index e981c80d216c..0b775cb5c108 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -837,8 +837,8 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) page = alloc_pages(flags, order); if (likely(page)) { ret = page_address(page); - mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B, - PAGE_SIZE << order); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, + PAGE_SIZE << order); } ret = kasan_kmalloc_large(ret, size, flags); /* As ret might get tagged, call kmemleak hook after KASAN. */ diff --git a/mm/slub.c b/mm/slub.c index b22a4b101c84..c86037b38253 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1973,7 +1973,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n, t = acquire_slab(s, n, page, object == NULL, &objects); if (!t) - continue; /* cmpxchg raced */ + break; available += objects; if (!object) { @@ -3999,8 +3999,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) page = alloc_pages_node(node, flags, order); if (page) { ptr = page_address(page); - mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B, - PAGE_SIZE << order); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, + PAGE_SIZE << order); } return kmalloc_large_node_hook(ptr, size, flags); @@ -4131,8 +4131,8 @@ void kfree(const void *x) BUG_ON(!PageCompound(page)); kfree_hook(object); - mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B, - -(PAGE_SIZE << order)); + mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B, + -(PAGE_SIZE << order)); __free_pages(page, order); return; } diff --git a/mm/swapfile.c b/mm/swapfile.c index 9fffc5af29d1..348f6665c06c 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -220,6 +220,19 @@ offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset) BUG(); } +sector_t swap_page_sector(struct page *page) +{ + struct swap_info_struct *sis = page_swap_info(page); + struct swap_extent *se; + sector_t sector; + pgoff_t offset; + + offset = __page_file_index(page); + se = offset_to_swap_extent(sis, offset); + sector = se->start_block + (offset - se->start_page); + return sector << (PAGE_SHIFT - 9); +} + /* * swap allocation tell device that a cluster of swap can now be discarded, * to allow the swap device to optimize its wear-levelling. diff --git a/mm/vmscan.c b/mm/vmscan.c index b1b574ad199d..f9fcde30df3e 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -115,6 +115,14 @@ struct scan_control { /* There is easily reclaimable cold cache in the current node */ unsigned int cache_trim_mode:1; +#if defined(CONFIG_UNEVICTABLE_FILE) + /* The file pages on the current node are low */ + unsigned int file_is_low:1; + + /* The file pages on the current node are minimal */ + unsigned int file_is_min:1; +#endif + /* The file pages on the current node are dangerously low */ unsigned int file_is_tiny:1; @@ -164,6 +172,11 @@ struct scan_control { #define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0) #endif +#if defined(CONFIG_UNEVICTABLE_FILE) +extern unsigned long sysctl_unevictable_file_kbytes_low; +extern unsigned long sysctl_unevictable_file_kbytes_min; +#endif + /* * From 0 .. 200. Higher means more swappy. */ @@ -2427,6 +2440,15 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, BUG(); } +#if defined(CONFIG_UNEVICTABLE_FILE) + if (file && scan) { + if (sc->file_is_low) + scan = min(scan, SWAP_CLUSTER_MAX >> sc->priority); + else if (sc->file_is_min) + scan = 0; + } +#endif + nr[lru] = scan; } } @@ -2673,6 +2695,10 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) } while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL))); } +#if defined(CONFIG_UNEVICTABLE_FILE) +#define K(x) ((x) << (PAGE_SHIFT - 10)) +#endif + static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) { struct reclaim_state *reclaim_state = current->reclaim_state; @@ -2775,6 +2801,13 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc) file + free <= total_high_wmark && !(sc->may_deactivate & DEACTIVATE_ANON) && anon >> sc->priority; + +#if defined(CONFIG_UNEVICTABLE_FILE) + sc->file_is_low = K(file) < sysctl_unevictable_file_kbytes_low && + K(file) > sysctl_unevictable_file_kbytes_min; + + sc->file_is_min = K(file) <= sysctl_unevictable_file_kbytes_min; +#endif } shrink_node_memcgs(pgdat, sc); @@ -4095,8 +4128,13 @@ module_init(kswapd_init) */ int node_reclaim_mode __read_mostly; -#define RECLAIM_WRITE (1<<0) /* Writeout pages during reclaim */ -#define RECLAIM_UNMAP (1<<1) /* Unmap pages during reclaim */ +/* + * These bit locations are exposed in the vm.zone_reclaim_mode sysctl + * ABI. New bits are OK, but existing bits can never change. + */ +#define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */ +#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */ +#define RECLAIM_UNMAP (1<<2) /* Unmap pages during reclaim */ /* * Priority for NODE_RECLAIM. This determines the fraction of pages diff --git a/mm/z3fold.c b/mm/z3fold.c index dacb0d70fa61..809628d91c7b 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -1353,8 +1353,22 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) page = list_entry(pos, struct page, lru); zhdr = page_address(page); - if (test_bit(PAGE_HEADLESS, &page->private)) + if (test_bit(PAGE_HEADLESS, &page->private)) { + /* + * For non-headless pages, we wait to do this + * until we have the page lock to avoid racing + * with __z3fold_alloc(). Headless pages don't + * have a lock (and __z3fold_alloc() will never + * see them), but we still need to test and set + * PAGE_CLAIMED to avoid racing with + * z3fold_free(), so just do it now before + * leaving the loop. + */ + if (test_and_set_bit(PAGE_CLAIMED, &page->private)) + continue; + break; + } if (kref_get_unless_zero(&zhdr->refcount) == 0) { zhdr = NULL; @@ -1778,6 +1792,7 @@ static u64 z3fold_zpool_total_size(void *pool) static struct zpool_driver z3fold_zpool_driver = { .type = "z3fold", + .sleep_mapped = true, .owner = THIS_MODULE, .create = z3fold_zpool_create, .destroy = z3fold_zpool_destroy, diff --git a/mm/zbud.c b/mm/zbud.c index c49966ece674..7ec5f27a68b0 100644 --- a/mm/zbud.c +++ b/mm/zbud.c @@ -203,6 +203,7 @@ static u64 zbud_zpool_total_size(void *pool) static struct zpool_driver zbud_zpool_driver = { .type = "zbud", + .sleep_mapped = true, .owner = THIS_MODULE, .create = zbud_zpool_create, .destroy = zbud_zpool_destroy, diff --git a/mm/zpool.c b/mm/zpool.c index 3744a2d1a624..5ed71207ced7 100644 --- a/mm/zpool.c +++ b/mm/zpool.c @@ -23,6 +23,7 @@ struct zpool { void *pool; const struct zpool_ops *ops; bool evictable; + bool can_sleep_mapped; struct list_head list; }; @@ -183,6 +184,7 @@ struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp, zpool->pool = driver->create(name, gfp, ops, zpool); zpool->ops = ops; zpool->evictable = driver->shrink && ops && ops->evict; + zpool->can_sleep_mapped = driver->sleep_mapped; if (!zpool->pool) { pr_err("couldn't create %s pool\n", type); @@ -393,6 +395,17 @@ bool zpool_evictable(struct zpool *zpool) return zpool->evictable; } +/** + * zpool_can_sleep_mapped - Test if zpool can sleep when do mapped. + * @zpool: The zpool to test + * + * Returns: true if zpool can sleep; false otherwise. + */ +bool zpool_can_sleep_mapped(struct zpool *zpool) +{ + return zpool->can_sleep_mapped; +} + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Dan Streetman "); MODULE_DESCRIPTION("Common API for compressed memory storage"); diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 7289f502ffac..052977d7936e 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -2213,11 +2213,13 @@ static unsigned long zs_can_compact(struct size_class *class) return obj_wasted * class->pages_per_zspage; } -static void __zs_compact(struct zs_pool *pool, struct size_class *class) +static unsigned long __zs_compact(struct zs_pool *pool, + struct size_class *class) { struct zs_compact_control cc; struct zspage *src_zspage; struct zspage *dst_zspage = NULL; + unsigned long pages_freed = 0; spin_lock(&class->lock); while ((src_zspage = isolate_zspage(class, true))) { @@ -2247,7 +2249,7 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class) putback_zspage(class, dst_zspage); if (putback_zspage(class, src_zspage) == ZS_EMPTY) { free_zspage(pool, class, src_zspage); - pool->stats.pages_compacted += class->pages_per_zspage; + pages_freed += class->pages_per_zspage; } spin_unlock(&class->lock); cond_resched(); @@ -2258,12 +2260,15 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class) putback_zspage(class, src_zspage); spin_unlock(&class->lock); + + return pages_freed; } unsigned long zs_compact(struct zs_pool *pool) { int i; struct size_class *class; + unsigned long pages_freed = 0; for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) { class = pool->size_class[i]; @@ -2271,10 +2276,11 @@ unsigned long zs_compact(struct zs_pool *pool) continue; if (class->index != i) continue; - __zs_compact(pool, class); + pages_freed += __zs_compact(pool, class); } + atomic_long_add(pages_freed, &pool->stats.pages_compacted); - return pool->stats.pages_compacted; + return pages_freed; } EXPORT_SYMBOL_GPL(zs_compact); @@ -2291,13 +2297,12 @@ static unsigned long zs_shrinker_scan(struct shrinker *shrinker, struct zs_pool *pool = container_of(shrinker, struct zs_pool, shrinker); - pages_freed = pool->stats.pages_compacted; /* * Compact classes and calculate compaction delta. * Can run concurrently with a manually triggered * (by user) compaction. */ - pages_freed = zs_compact(pool) - pages_freed; + pages_freed = zs_compact(pool); return pages_freed ? pages_freed : SHRINK_STOP; } diff --git a/mm/zswap.c b/mm/zswap.c index 182f6ad5aa69..1566cc3ab7f4 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -935,13 +935,19 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) struct scatterlist input, output; struct crypto_acomp_ctx *acomp_ctx; - u8 *src; + u8 *src, *tmp = NULL; unsigned int dlen; int ret; struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, }; + if (!zpool_can_sleep_mapped(pool)) { + tmp = kmalloc(PAGE_SIZE, GFP_ATOMIC); + if (!tmp) + return -ENOMEM; + } + /* extract swpentry from data */ zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO); swpentry = zhdr->swpentry; /* here */ @@ -955,6 +961,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) /* entry was invalidated */ spin_unlock(&tree->lock); zpool_unmap_handle(pool, handle); + kfree(tmp); return 0; } spin_unlock(&tree->lock); @@ -979,6 +986,14 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) dlen = PAGE_SIZE; src = (u8 *)zhdr + sizeof(struct zswap_header); + if (!zpool_can_sleep_mapped(pool)) { + + memcpy(tmp, src, entry->length); + src = tmp; + + zpool_unmap_handle(pool, handle); + } + mutex_lock(acomp_ctx->mutex); sg_init_one(&input, src, entry->length); sg_init_table(&output, 1); @@ -1033,7 +1048,11 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle) spin_unlock(&tree->lock); end: - zpool_unmap_handle(pool, handle); + if (zpool_can_sleep_mapped(pool)) + zpool_unmap_handle(pool, handle); + else + kfree(tmp); + return ret; } @@ -1235,7 +1254,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, struct zswap_entry *entry; struct scatterlist input, output; struct crypto_acomp_ctx *acomp_ctx; - u8 *src, *dst; + u8 *src, *dst, *tmp; unsigned int dlen; int ret; @@ -1253,15 +1272,33 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, dst = kmap_atomic(page); zswap_fill_page(dst, entry->value); kunmap_atomic(dst); + ret = 0; goto freeentry; } + if (!zpool_can_sleep_mapped(entry->pool->zpool)) { + + tmp = kmalloc(entry->length, GFP_ATOMIC); + if (!tmp) { + ret = -ENOMEM; + goto freeentry; + } + } + /* decompress */ dlen = PAGE_SIZE; src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO); if (zpool_evictable(entry->pool->zpool)) src += sizeof(struct zswap_header); + if (!zpool_can_sleep_mapped(entry->pool->zpool)) { + + memcpy(tmp, src, entry->length); + src = tmp; + + zpool_unmap_handle(entry->pool->zpool, entry->handle); + } + acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); mutex_lock(acomp_ctx->mutex); sg_init_one(&input, src, entry->length); @@ -1271,7 +1308,11 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait); mutex_unlock(acomp_ctx->mutex); - zpool_unmap_handle(entry->pool->zpool, entry->handle); + if (zpool_can_sleep_mapped(entry->pool->zpool)) + zpool_unmap_handle(entry->pool->zpool, entry->handle); + else + kfree(tmp); + BUG_ON(ret); freeentry: @@ -1279,7 +1320,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset, zswap_entry_put(tree, entry); spin_unlock(&tree->lock); - return 0; + return ret; } /* frees an entry in zswap */ diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index da7fd7c8c2dc..463bad58478b 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -381,9 +381,9 @@ static int a2mp_getampassoc_req(struct amp_mgr *mgr, struct sk_buff *skb, hdev = hci_dev_get(req->id); if (!hdev || hdev->amp_type == AMP_TYPE_BREDR || tmp) { struct a2mp_amp_assoc_rsp rsp; - rsp.id = req->id; memset(&rsp, 0, sizeof(rsp)); + rsp.id = req->id; if (tmp) { rsp.status = A2MP_STATUS_COLLISION_OCCURED; @@ -512,6 +512,7 @@ static int a2mp_createphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb, assoc = kmemdup(req->amp_assoc, assoc_len, GFP_KERNEL); if (!assoc) { amp_ctrl_put(ctrl); + hci_dev_put(hdev); return -ENOMEM; } diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c index 9c711f0dfae3..be2d469d6369 100644 --- a/net/bluetooth/amp.c +++ b/net/bluetooth/amp.c @@ -297,6 +297,9 @@ void amp_read_loc_assoc_final_data(struct hci_dev *hdev, struct hci_request req; int err; + if (!mgr) + return; + cp.phy_handle = hcon->handle; cp.len_so_far = cpu_to_le16(0); cp.max_len = cpu_to_le16(hdev->amp_assoc_size); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 9d2c9a1c552f..6ea2e16c57bd 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1362,8 +1362,10 @@ int hci_inquiry(void __user *arg) * cleared). If it is interrupted by a signal, return -EINTR. */ if (wait_on_bit(&hdev->flags, HCI_INQUIRY, - TASK_INTERRUPTIBLE)) - return -EINTR; + TASK_INTERRUPTIBLE)) { + err = -EINTR; + goto done; + } } /* for unlimited number of responses we will use buffer with @@ -3566,7 +3568,8 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action, } /* Suspend notifier should only act on events when powered. */ - if (!hdev_is_powered(hdev)) + if (!hdev_is_powered(hdev) || + hci_dev_test_flag(hdev, HCI_UNREGISTER)) goto done; if (action == PM_SUSPEND_PREPARE) { @@ -3827,10 +3830,12 @@ int hci_register_dev(struct hci_dev *hdev) hci_sock_dev_event(hdev, HCI_DEV_REG); hci_dev_hold(hdev); - hdev->suspend_notifier.notifier_call = hci_suspend_notifier; - error = register_pm_notifier(&hdev->suspend_notifier); - if (error) - goto err_wqueue; + if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { + hdev->suspend_notifier.notifier_call = hci_suspend_notifier; + error = register_pm_notifier(&hdev->suspend_notifier); + if (error) + goto err_wqueue; + } queue_work(hdev->req_workqueue, &hdev->power_on); @@ -3865,9 +3870,11 @@ void hci_unregister_dev(struct hci_dev *hdev) cancel_work_sync(&hdev->power_on); - hci_suspend_clear_tasks(hdev); - unregister_pm_notifier(&hdev->suspend_notifier); - cancel_work_sync(&hdev->suspend_prepare); + if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks)) { + hci_suspend_clear_tasks(hdev); + unregister_pm_notifier(&hdev->suspend_notifier); + cancel_work_sync(&hdev->suspend_prepare); + } hci_dev_do_close(hdev); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 71bffd745472..5aa7bd5030a2 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1087,6 +1087,8 @@ void hci_req_add_le_passive_scan(struct hci_request *req) if (hdev->suspended) { window = hdev->le_scan_window_suspend; interval = hdev->le_scan_int_suspend; + + set_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks); } else if (hci_is_le_conn_scanning(hdev)) { window = hdev->le_scan_window_connect; interval = hdev->le_scan_int_connect; @@ -1170,19 +1172,6 @@ static void hci_req_set_event_filter(struct hci_request *req) hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } -static void hci_req_config_le_suspend_scan(struct hci_request *req) -{ - /* Before changing params disable scan if enabled */ - if (hci_dev_test_flag(req->hdev, HCI_LE_SCAN)) - hci_req_add_le_scan_disable(req, false); - - /* Configure params and enable scanning */ - hci_req_add_le_passive_scan(req); - - /* Block suspend notifier on response */ - set_bit(SUSPEND_SCAN_ENABLE, req->hdev->suspend_tasks); -} - static void cancel_adv_timeout(struct hci_dev *hdev) { if (hdev->adv_instance_timeout) { @@ -1245,8 +1234,10 @@ static void suspend_req_complete(struct hci_dev *hdev, u8 status, u16 opcode) { bt_dev_dbg(hdev, "Request complete opcode=0x%x, status=0x%x", opcode, status); - if (test_and_clear_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks) || - test_and_clear_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks)) { + if (test_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks) || + test_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks)) { + clear_bit(SUSPEND_SCAN_ENABLE, hdev->suspend_tasks); + clear_bit(SUSPEND_SCAN_DISABLE, hdev->suspend_tasks); wake_up(&hdev->suspend_wait_q); } } @@ -1336,7 +1327,7 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) /* Enable event filter for paired devices */ hci_req_set_event_filter(&req); /* Enable passive scan at lower duty cycle */ - hci_req_config_le_suspend_scan(&req); + __hci_update_background_scan(&req); /* Pause scan changes again. */ hdev->scanning_paused = true; hci_req_run(&req, suspend_req_complete); @@ -1346,7 +1337,7 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next) hci_req_clear_event_filter(&req); /* Reset passive/background scanning to normal */ - hci_req_config_le_suspend_scan(&req); + __hci_update_background_scan(&req); /* Unpause directed advertising */ hdev->advertising_paused = false; diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 015209bf44aa..3c42095fa75f 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -123,6 +123,8 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) { if (!fdb->dst) return; + if (test_bit(BR_FDB_LOCAL, &fdb->flags)) + return; switch (type) { case RTM_DELNEIGH: diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 7a59cdddd3ce..5047e9c2333a 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -55,9 +55,8 @@ static BRPORT_ATTR(_name, 0644, \ static int store_flag(struct net_bridge_port *p, unsigned long v, unsigned long mask) { - unsigned long flags; - - flags = p->flags; + unsigned long flags = p->flags; + int err; if (v) flags |= mask; @@ -65,6 +64,10 @@ static int store_flag(struct net_bridge_port *p, unsigned long v, flags &= ~mask; if (flags != p->flags) { + err = br_switchdev_set_port_flag(p, flags, mask); + if (err) + return err; + p->flags = flags; br_port_flags_change(p, mask); } diff --git a/net/can/isotp.c b/net/can/isotp.c index 3ef7f78e553b..15ea1234d457 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -196,7 +196,7 @@ static int isotp_send_fc(struct sock *sk, int ae, u8 flowstatus) nskb->dev = dev; can_skb_set_owner(nskb, sk); ncf = (struct canfd_frame *)nskb->data; - skb_put(nskb, so->ll.mtu); + skb_put_zero(nskb, so->ll.mtu); /* create & send flow control reply */ ncf->can_id = so->txid; @@ -215,8 +215,7 @@ static int isotp_send_fc(struct sock *sk, int ae, u8 flowstatus) if (ae) ncf->data[0] = so->opt.ext_address; - if (so->ll.mtu == CANFD_MTU) - ncf->flags = so->ll.tx_flags; + ncf->flags = so->ll.tx_flags; can_send_ret = can_send(nskb, 1); if (can_send_ret) @@ -780,7 +779,7 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer) can_skb_prv(skb)->skbcnt = 0; cf = (struct canfd_frame *)skb->data; - skb_put(skb, so->ll.mtu); + skb_put_zero(skb, so->ll.mtu); /* create consecutive frame */ isotp_fill_dataframe(cf, so, ae, 0); @@ -790,8 +789,7 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer) so->tx.sn %= 16; so->tx.bs++; - if (so->ll.mtu == CANFD_MTU) - cf->flags = so->ll.tx_flags; + cf->flags = so->ll.tx_flags; skb->dev = dev; can_skb_set_owner(skb, sk); @@ -897,7 +895,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) so->tx.idx = 0; cf = (struct canfd_frame *)skb->data; - skb_put(skb, so->ll.mtu); + skb_put_zero(skb, so->ll.mtu); /* check for single frame transmission depending on TX_DL */ if (size <= so->tx.ll_dl - SF_PCI_SZ4 - ae - off) { @@ -939,8 +937,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } /* send the first or only CAN frame */ - if (so->ll.mtu == CANFD_MTU) - cf->flags = so->ll.tx_flags; + cf->flags = so->ll.tx_flags; skb->dev = dev; skb->sk = sk; @@ -1228,7 +1225,8 @@ static int isotp_setsockopt(struct socket *sock, int level, int optname, if (ll.mtu != CAN_MTU && ll.mtu != CANFD_MTU) return -EINVAL; - if (ll.mtu == CAN_MTU && ll.tx_dl > CAN_MAX_DLEN) + if (ll.mtu == CAN_MTU && + (ll.tx_dl > CAN_MAX_DLEN || ll.tx_flags != 0)) return -EINVAL; memcpy(&so->ll, &ll, sizeof(ll)); diff --git a/net/core/dev.c b/net/core/dev.c index 449b45b843d4..9e3be2ae8653 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1182,6 +1182,18 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) return -ENOMEM; for_each_netdev(net, d) { + struct netdev_name_node *name_node; + list_for_each_entry(name_node, &d->name_node->list, list) { + if (!sscanf(name_node->name, name, &i)) + continue; + if (i < 0 || i >= max_netdevices) + continue; + + /* avoid cases where sscanf is not exact inverse of printf */ + snprintf(buf, IFNAMSIZ, name, i); + if (!strncmp(buf, name_node->name, IFNAMSIZ)) + set_bit(i, inuse); + } if (!sscanf(d->name, name, &i)) continue; if (i < 0 || i >= max_netdevices) @@ -8743,6 +8755,48 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa, } EXPORT_SYMBOL(dev_set_mac_address); +static DECLARE_RWSEM(dev_addr_sem); + +int dev_set_mac_address_user(struct net_device *dev, struct sockaddr *sa, + struct netlink_ext_ack *extack) +{ + int ret; + + down_write(&dev_addr_sem); + ret = dev_set_mac_address(dev, sa, extack); + up_write(&dev_addr_sem); + return ret; +} +EXPORT_SYMBOL(dev_set_mac_address_user); + +int dev_get_mac_address(struct sockaddr *sa, struct net *net, char *dev_name) +{ + size_t size = sizeof(sa->sa_data); + struct net_device *dev; + int ret = 0; + + down_read(&dev_addr_sem); + rcu_read_lock(); + + dev = dev_get_by_name_rcu(net, dev_name); + if (!dev) { + ret = -ENODEV; + goto unlock; + } + if (!dev->addr_len) + memset(sa->sa_data, 0, size); + else + memcpy(sa->sa_data, dev->dev_addr, + min_t(size_t, size, dev->addr_len)); + sa->sa_family = dev->type; + +unlock: + rcu_read_unlock(); + up_read(&dev_addr_sem); + return ret; +} +EXPORT_SYMBOL(dev_get_mac_address); + /** * dev_change_carrier - Change device carrier * @dev: device @@ -11140,7 +11194,7 @@ static void __net_exit default_device_exit(struct net *net) continue; /* Leave virtual devices for the generic cleanup */ - if (dev->rtnl_link_ops) + if (dev->rtnl_link_ops && !dev->rtnl_link_ops->netns_refund) continue; /* Push remaining network devices to init_net */ diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index db8a0ff86f36..478d032f34ac 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -123,17 +123,6 @@ static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cm ifr->ifr_mtu = dev->mtu; return 0; - case SIOCGIFHWADDR: - if (!dev->addr_len) - memset(ifr->ifr_hwaddr.sa_data, 0, - sizeof(ifr->ifr_hwaddr.sa_data)); - else - memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, - min(sizeof(ifr->ifr_hwaddr.sa_data), - (size_t)dev->addr_len)); - ifr->ifr_hwaddr.sa_family = dev->type; - return 0; - case SIOCGIFSLAVE: err = -EINVAL; break; @@ -274,7 +263,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) case SIOCSIFHWADDR: if (dev->addr_len > sizeof(struct sockaddr)) return -EINVAL; - return dev_set_mac_address(dev, &ifr->ifr_hwaddr, NULL); + return dev_set_mac_address_user(dev, &ifr->ifr_hwaddr, NULL); case SIOCSIFHWBROADCAST: if (ifr->ifr_hwaddr.sa_family != dev->type) @@ -418,6 +407,12 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c */ switch (cmd) { + case SIOCGIFHWADDR: + dev_load(net, ifr->ifr_name); + ret = dev_get_mac_address(&ifr->ifr_hwaddr, net, ifr->ifr_name); + if (colon) + *colon = ':'; + return ret; /* * These ioctl calls: * - can be done by all. @@ -427,7 +422,6 @@ int dev_ioctl(struct net *net, unsigned int cmd, struct ifreq *ifr, bool *need_c case SIOCGIFFLAGS: case SIOCGIFMETRIC: case SIOCGIFMTU: - case SIOCGIFHWADDR: case SIOCGIFSLAVE: case SIOCGIFMAP: case SIOCGIFINDEX: diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 571f191c06d9..db65ce62b625 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -1053,6 +1053,20 @@ static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack) return 0; err_module_put: + for_each_possible_cpu(cpu) { + struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); + struct sk_buff *skb; + + del_timer_sync(&hw_data->send_timer); + cancel_work_sync(&hw_data->dm_alert_work); + while ((skb = __skb_dequeue(&hw_data->drop_queue))) { + struct devlink_trap_metadata *hw_metadata; + + hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata; + net_dm_hw_metadata_free(hw_metadata); + consume_skb(skb); + } + } module_put(THIS_MODULE); return rc; } @@ -1134,6 +1148,15 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack) err_unregister_trace: unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL); err_module_put: + for_each_possible_cpu(cpu) { + struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); + struct sk_buff *skb; + + del_timer_sync(&data->send_timer); + cancel_work_sync(&data->dm_alert_work); + while ((skb = __skb_dequeue(&data->drop_queue))) + consume_skb(skb); + } module_put(THIS_MODULE); return rc; } diff --git a/net/core/dst.c b/net/core/dst.c index 0c01bd8d9d81..fb3bcba87744 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -237,37 +237,62 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old) } EXPORT_SYMBOL(__dst_destroy_metrics_generic); -static struct dst_ops md_dst_ops = { - .family = AF_UNSPEC, -}; +struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie) +{ + return NULL; +} -static int dst_md_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb) +u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old) { - WARN_ONCE(1, "Attempting to call output on metadata dst\n"); - kfree_skb(skb); - return 0; + return NULL; } -static int dst_md_discard(struct sk_buff *skb) +struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst, + struct sk_buff *skb, + const void *daddr) { - WARN_ONCE(1, "Attempting to call input on metadata dst\n"); - kfree_skb(skb); - return 0; + return NULL; +} + +void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) +{ +} +EXPORT_SYMBOL_GPL(dst_blackhole_update_pmtu); + +void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb) +{ +} +EXPORT_SYMBOL_GPL(dst_blackhole_redirect); + +unsigned int dst_blackhole_mtu(const struct dst_entry *dst) +{ + unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); + + return mtu ? : dst->dev->mtu; } +EXPORT_SYMBOL_GPL(dst_blackhole_mtu); + +static struct dst_ops dst_blackhole_ops = { + .family = AF_UNSPEC, + .neigh_lookup = dst_blackhole_neigh_lookup, + .check = dst_blackhole_check, + .cow_metrics = dst_blackhole_cow_metrics, + .update_pmtu = dst_blackhole_update_pmtu, + .redirect = dst_blackhole_redirect, + .mtu = dst_blackhole_mtu, +}; static void __metadata_dst_init(struct metadata_dst *md_dst, enum metadata_type type, u8 optslen) - { struct dst_entry *dst; dst = &md_dst->dst; - dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE, + dst_init(dst, &dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, DST_METADATA | DST_NOCOUNT); - - dst->input = dst_md_discard; - dst->output = dst_md_discard_out; - memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst)); md_dst->type = type; } diff --git a/net/core/filter.c b/net/core/filter.c index 255aeee72402..ee665720a41a 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5552,6 +5552,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, { struct net *net = dev_net(skb->dev); int rc = -EAFNOSUPPORT; + bool check_mtu = false; if (plen < sizeof(*params)) return -EINVAL; @@ -5559,22 +5560,28 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb, if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT)) return -EINVAL; + if (params->tot_len) + check_mtu = true; + switch (params->family) { #if IS_ENABLED(CONFIG_INET) case AF_INET: - rc = bpf_ipv4_fib_lookup(net, params, flags, false); + rc = bpf_ipv4_fib_lookup(net, params, flags, check_mtu); break; #endif #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: - rc = bpf_ipv6_fib_lookup(net, params, flags, false); + rc = bpf_ipv6_fib_lookup(net, params, flags, check_mtu); break; #endif } - if (!rc) { + if (rc == BPF_FIB_LKUP_RET_SUCCESS && !check_mtu) { struct net_device *dev; + /* When tot_len isn't provided by user, check skb + * against MTU of FIB lookup resulting net_device + */ dev = dev_get_by_index_rcu(net, params->ifindex); if (!is_skb_forwardable(dev, skb)) rc = BPF_FIB_LKUP_RET_FRAG_NEEDED; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 6f1adba6695f..7a06d4301617 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -175,7 +175,7 @@ void skb_flow_get_icmp_tci(const struct sk_buff *skb, * avoid confusion with packets without such field */ if (icmp_has_id(ih->type)) - key_icmp->id = ih->un.echo.id ? : 1; + key_icmp->id = ih->un.echo.id ? ntohs(ih->un.echo.id) : 1; else key_icmp->id = 0; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 105978604ffd..3fba429f1f57 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3464,7 +3464,7 @@ static int pktgen_thread_worker(void *arg) struct pktgen_dev *pkt_dev = NULL; int cpu = t->cpu; - BUG_ON(smp_processor_id() != cpu); + WARN_ON(smp_processor_id() != cpu); init_waitqueue_head(&t->queue); complete(&t->start_done); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 3d6ab194d0f5..f7c3885133dd 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2660,7 +2660,7 @@ static int do_setlink(const struct sk_buff *skb, sa->sa_family = dev->type; memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), dev->addr_len); - err = dev_set_mac_address(dev, sa, extack); + err = dev_set_mac_address_user(dev, sa, extack); kfree(sa); if (err) goto errout; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 785daff48030..2b784d62a9fe 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3292,7 +3292,19 @@ EXPORT_SYMBOL(skb_split); */ static int skb_prepare_for_shift(struct sk_buff *skb) { - return skb_cloned(skb) && pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + int ret = 0; + + if (skb_cloned(skb)) { + /* Save and restore truesize: pskb_expand_head() may reallocate + * memory where ksize(kmalloc(S)) != ksize(kmalloc(S)), but we + * cannot change truesize at this point. + */ + unsigned int save_truesize = skb->truesize; + + ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + skb->truesize = save_truesize; + } + return ret; } /** @@ -3610,6 +3622,8 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, struct ts_state state; unsigned int ret; + BUILD_BUG_ON(sizeof(struct skb_seq_state) > sizeof(state.cb)); + config->get_next_block = skb_ts_get_next_block; config->finish = skb_ts_finish; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 1f73603913f5..2be5c69824f9 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -319,6 +319,11 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!ipv6_unicast_destination(skb)) return 0; /* discard, don't send a reset here */ + if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { + __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); + return 0; + } + if (dccp_bad_service_code(sk, service)) { dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; goto drop; diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index 38dcdded74c0..59748487664f 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -13,6 +13,7 @@ #define MTK_HDR_LEN 4 #define MTK_HDR_XMIT_UNTAGGED 0 #define MTK_HDR_XMIT_TAGGED_TPID_8100 1 +#define MTK_HDR_XMIT_TAGGED_TPID_88A8 2 #define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) #define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0) #define MTK_HDR_XMIT_SA_DIS BIT(6) @@ -21,8 +22,8 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); + u8 xmit_tpid; u8 *mtk_tag; - bool is_vlan_skb = true; unsigned char *dest = eth_hdr(skb)->h_dest; bool is_multicast_skb = is_multicast_ether_addr(dest) && !is_broadcast_ether_addr(dest); @@ -33,10 +34,17 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, * the both special and VLAN tag at the same time and then look up VLAN * table with VID. */ - if (!skb_vlan_tagged(skb)) { + switch (skb->protocol) { + case htons(ETH_P_8021Q): + xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_8100; + break; + case htons(ETH_P_8021AD): + xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_88A8; + break; + default: + xmit_tpid = MTK_HDR_XMIT_UNTAGGED; skb_push(skb, MTK_HDR_LEN); memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN); - is_vlan_skb = false; } mtk_tag = skb->data + 2 * ETH_ALEN; @@ -44,8 +52,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, /* Mark tag attribute on special tag insertion to notify hardware * whether that's a combined special tag with 802.1Q header. */ - mtk_tag[0] = is_vlan_skb ? MTK_HDR_XMIT_TAGGED_TPID_8100 : - MTK_HDR_XMIT_UNTAGGED; + mtk_tag[0] = xmit_tpid; mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; /* Disable SA learning for multicast frames */ @@ -53,7 +60,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS; /* Tag control information is kept for 802.1Q */ - if (!is_vlan_skb) { + if (xmit_tpid == MTK_HDR_XMIT_UNTAGGED) { mtk_tag[2] = 0; mtk_tag[3] = 0; } diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index 2646abe5a69e..e9176475bac8 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -12,9 +12,7 @@ * * The 2 bytes tag form a 16 bit big endian word. The exact * meaning has been guessed from packet dumps from ingress - * frames, as no working egress traffic has been available - * we do not know the format of the egress tags or if they - * are even supported. + * frames. */ #include @@ -36,17 +34,32 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - /* - * Just let it pass thru, we don't know if it is possible - * to tag a frame with the 0x8899 ethertype and direct it - * to a specific port, all attempts at reverse-engineering have - * ended up with the frames getting dropped. - * - * The VLAN set-up needs to restrict the frames to the right port. - * - * If you have documentation on the tagging format for RTL8366RB - * (tag type A) then please contribute. - */ + struct dsa_port *dp = dsa_slave_to_port(dev); + __be16 *p; + u8 *tag; + u16 out; + + /* Pad out to at least 60 bytes */ + if (unlikely(__skb_put_padto(skb, ETH_ZLEN, false))) + return NULL; + + netdev_dbg(dev, "add realtek tag to package to port %d\n", + dp->index); + skb_push(skb, RTL4_A_HDR_LEN); + + memmove(skb->data, skb->data + RTL4_A_HDR_LEN, 2 * ETH_ALEN); + tag = skb->data + 2 * ETH_ALEN; + + /* Set Ethertype */ + p = (__be16 *)tag; + *p = htons(RTL4_A_ETHERTYPE); + + out = (RTL4_A_PROTOCOL_RTL8366RB << 12) | (2 << 8); + /* The lower bits is the port number */ + out |= (u8)dp->index; + p = (__be16 *)(tag + 2); + *p = htons(out); + return skb; } diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index 25a9e566ef5c..6a070dc8e4b0 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -116,10 +116,9 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) struct ethtool_channels channels = {}; struct ethnl_req_info req_info = {}; struct nlattr **tb = info->attrs; - const struct nlattr *err_attr; + u32 err_attr, max_rx_in_use = 0; const struct ethtool_ops *ops; struct net_device *dev; - u32 max_rx_in_use = 0; int ret; ret = ethnl_parse_header_dev_get(&req_info, @@ -157,34 +156,35 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) /* ensure new channel counts are within limits */ if (channels.rx_count > channels.max_rx) - err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_RX_COUNT; else if (channels.tx_count > channels.max_tx) - err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_TX_COUNT; else if (channels.other_count > channels.max_other) - err_attr = tb[ETHTOOL_A_CHANNELS_OTHER_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_OTHER_COUNT; else if (channels.combined_count > channels.max_combined) - err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_COMBINED_COUNT; else - err_attr = NULL; + err_attr = 0; if (err_attr) { ret = -EINVAL; - NL_SET_ERR_MSG_ATTR(info->extack, err_attr, + NL_SET_ERR_MSG_ATTR(info->extack, tb[err_attr], "requested channel count exceeds maximum"); goto out_ops; } /* ensure there is at least one RX and one TX channel */ if (!channels.combined_count && !channels.rx_count) - err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_RX_COUNT; else if (!channels.combined_count && !channels.tx_count) - err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_TX_COUNT; else - err_attr = NULL; + err_attr = 0; if (err_attr) { if (mod_combined) - err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT]; + err_attr = ETHTOOL_A_CHANNELS_COMBINED_COUNT; ret = -EINVAL; - NL_SET_ERR_MSG_ATTR(info->extack, err_attr, "requested channel counts would result in no RX or TX channel being configured"); + NL_SET_ERR_MSG_ATTR(info->extack, tb[err_attr], + "requested channel counts would result in no RX or TX channel being configured"); goto out_ops; } diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 5c97de459905..805f974923b9 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -164,8 +164,10 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr, * as initialization. (0 could trigger an spurious ring error warning). */ now = jiffies; - for (i = 0; i < HSR_PT_PORTS; i++) + for (i = 0; i < HSR_PT_PORTS; i++) { new_node->time_in[i] = now; + new_node->time_out[i] = now; + } for (i = 0; i < HSR_PT_PORTS; i++) new_node->seq_out[i] = seq_out; @@ -411,9 +413,12 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, u16 sequence_nr) { - if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type])) + if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]) && + time_is_after_jiffies(node->time_out[port->type] + + msecs_to_jiffies(HSR_ENTRY_FORGET_TIME))) return 1; + node->time_out[port->type] = jiffies; node->seq_out[port->type] = sequence_nr; return 0; } diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 86b43f539f2c..d9628e7a5f05 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -75,6 +75,7 @@ struct hsr_node { enum hsr_port_type addr_B_port; unsigned long time_in[HSR_PT_PORTS]; bool time_in_stale[HSR_PT_PORTS]; + unsigned long time_out[HSR_PT_PORTS]; /* if the node is a SAN */ bool san_a; bool san_b; diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index a9c30a608e35..80e976a8f28c 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -21,6 +21,7 @@ #define HSR_LIFE_CHECK_INTERVAL 2000 /* ms */ #define HSR_NODE_FORGET_TIME 60000 /* ms */ #define HSR_ANNOUNCE_INTERVAL 100 /* ms */ +#define HSR_ENTRY_FORGET_TIME 400 /* ms */ /* By how much may slave1 and slave2 timestamps of latest received frame from * each node differ before we notify of communication problem? diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 87983e70f03f..a833a7a67ce7 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -669,6 +669,24 @@ config TCP_CONG_BBR AQM schemes that do not provide a delay signal. It requires the fq ("Fair Queue") pacing packet scheduler. +config TCP_CONG_BBR2 + tristate "BBR2 TCP" + default n + help + + BBR2 TCP congestion control is a model-based congestion control + algorithm that aims to maximize network utilization, keep queues and + retransmit rates low, and to be able to coexist with Reno/CUBIC in + common scenarios. It builds an explicit model of the network path. It + tolerates a targeted degree of random packet loss and delay that are + unrelated to congestion. It can operate over LAN, WAN, cellular, wifi, + or cable modem links, and can use DCTCP-L4S-style ECN signals. It can + coexist with flows that use loss-based congestion control, and can + operate with shallow buffers, deep buffers, bufferbloat, policers, or + AQM schemes that do not provide a delay signal. It requires pacing, + using either TCP internal pacing or the fq ("Fair Queue") pacing packet + scheduler. + choice prompt "Default TCP congestion control" default DEFAULT_CUBIC @@ -706,6 +724,9 @@ choice config DEFAULT_BBR bool "BBR" if TCP_CONG_BBR=y + config DEFAULT_BBR2 + bool "BBR2" if TCP_CONG_BBR2=y + config DEFAULT_RENO bool "Reno" endchoice @@ -730,6 +751,7 @@ config DEFAULT_TCP_CONG default "dctcp" if DEFAULT_DCTCP default "cdg" if DEFAULT_CDG default "bbr" if DEFAULT_BBR + default "bbr2" if DEFAULT_BBR2 default "cubic" config TCP_MD5SIG diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 5b77a46885b9..8c5779dba462 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o obj-$(CONFIG_INET_RAW_DIAG) += raw_diag.o obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o +obj-$(CONFIG_TCP_CONG_BBR2) += tcp_bbr2.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index d520e61649c8..22129c1c56a2 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -16,7 +16,7 @@ static u32 optional_ops[] = { offsetof(struct tcp_congestion_ops, cwnd_event), offsetof(struct tcp_congestion_ops, in_ack_event), offsetof(struct tcp_congestion_ops, pkts_acked), - offsetof(struct tcp_congestion_ops, min_tso_segs), + offsetof(struct tcp_congestion_ops, tso_segs), offsetof(struct tcp_congestion_ops, sndbuf_expand), offsetof(struct tcp_congestion_ops, cong_control), }; diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 471d33a0d095..be09c7669a79 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -519,16 +519,10 @@ int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info) ret_val = -ENOENT; goto doi_remove_return; } - if (!refcount_dec_and_test(&doi_def->refcount)) { - spin_unlock(&cipso_v4_doi_list_lock); - ret_val = -EBUSY; - goto doi_remove_return; - } list_del_rcu(&doi_def->list); spin_unlock(&cipso_v4_doi_list_lock); - cipso_v4_cache_invalidate(); - call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); + cipso_v4_doi_putdef(doi_def); ret_val = 0; doi_remove_return: @@ -585,9 +579,6 @@ void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def) if (!refcount_dec_and_test(&doi_def->refcount)) return; - spin_lock(&cipso_v4_doi_list_lock); - list_del_rcu(&doi_def->list); - spin_unlock(&cipso_v4_doi_list_lock); cipso_v4_cache_invalidate(); call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 396b492c804f..616e2dc1c8fa 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -775,13 +775,14 @@ EXPORT_SYMBOL(__icmp_send); void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct sk_buff *cloned_skb = NULL; + struct ip_options opts = { 0 }; enum ip_conntrack_info ctinfo; struct nf_conn *ct; __be32 orig_ip; ct = nf_ct_get(skb_in, &ctinfo); if (!ct || !(ct->status & IPS_SRC_NAT)) { - icmp_send(skb_in, type, code, info); + __icmp_send(skb_in, type, code, info, &opts); return; } @@ -796,7 +797,7 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info) orig_ip = ip_hdr(skb_in)->saddr; ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip; - icmp_send(skb_in, type, code, info); + __icmp_send(skb_in, type, code, info, &opts); ip_hdr(skb_in)->saddr = orig_ip; out: consume_skb(cloned_skb); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 6bd7ca09af03..fd472eae4f5c 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -705,12 +705,15 @@ static bool reqsk_queue_unlink(struct request_sock *req) return found; } -void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) +bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) { - if (reqsk_queue_unlink(req)) { + bool unlinked = reqsk_queue_unlink(req); + + if (unlinked) { reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); reqsk_put(req); } + return unlinked; } EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 76a420c76f16..f6cc26de5ed3 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -502,8 +502,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, if (!skb_is_gso(skb) && (inner_iph->frag_off & htons(IP_DF)) && mtu < pkt_size) { - memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); return -E2BIG; } } @@ -527,7 +526,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && mtu < pkt_size) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); return -E2BIG; } } diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index abc171e79d3e..eb207089ece0 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -238,13 +238,13 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, if (skb->len > mtu) { skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IP)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); } else { if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } dst_release(dst); diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index c576a63d09db..d1e04d2b5170 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -203,7 +203,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = rcu_access_pointer(table->private); + private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; @@ -649,7 +649,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -673,7 +673,7 @@ static int copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct arpt_entry *e; struct xt_counters *counters; - struct xt_table_info *private = xt_table_get_private_protected(table); + struct xt_table_info *private = table->private; int ret = 0; void *loc_cpu_entry; @@ -807,7 +807,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, NFPROTO_ARP, name); if (!IS_ERR(t)) { struct arpt_getinfo info; - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -860,7 +860,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1017,7 +1017,7 @@ static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = xt_table_get_private_protected(t); + private = t->private; if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1330,7 +1330,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; @@ -1379,7 +1379,7 @@ static int compat_get_entries(struct net *net, xt_compat_lock(NFPROTO_ARP); t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e8f6f9d86237..f15bc21d7301 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -258,7 +258,7 @@ ipt_do_table(struct sk_buff *skb, WARN_ON(!(table->valid_hooks & (1 << hook))); local_bh_disable(); addend = xt_write_recseq_begin(); - private = rcu_access_pointer(table->private); + private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; @@ -791,7 +791,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -815,7 +815,7 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct ipt_entry *e; struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; int ret = 0; const void *loc_cpu_entry; @@ -964,7 +964,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, AF_INET, name); if (!IS_ERR(t)) { struct ipt_getinfo info; - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -1018,7 +1018,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); @@ -1173,7 +1173,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = xt_table_get_private_protected(t); + private = t->private; if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1543,7 +1543,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; @@ -1589,7 +1589,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, xt_compat_lock(AF_INET); t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); if (!ret && get.size == info.size) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index e53e43aef785..bad4037d257b 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1364,7 +1364,7 @@ static int insert_nexthop(struct net *net, struct nexthop *new_nh, /* rtnl */ /* remove all nexthops tied to a device being deleted */ -static void nexthop_flush_dev(struct net_device *dev) +static void nexthop_flush_dev(struct net_device *dev, unsigned long event) { unsigned int hash = nh_dev_hashfn(dev->ifindex); struct net *net = dev_net(dev); @@ -1376,6 +1376,10 @@ static void nexthop_flush_dev(struct net_device *dev) if (nhi->fib_nhc.nhc_dev != dev) continue; + if (nhi->reject_nh && + (event == NETDEV_DOWN || event == NETDEV_CHANGE)) + continue; + remove_nexthop(net, nhi->nh_parent, NULL); } } @@ -2122,11 +2126,11 @@ static int nh_netdev_event(struct notifier_block *this, switch (event) { case NETDEV_DOWN: case NETDEV_UNREGISTER: - nexthop_flush_dev(dev); + nexthop_flush_dev(dev, event); break; case NETDEV_CHANGE: if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP))) - nexthop_flush_dev(dev); + nexthop_flush_dev(dev, event); break; case NETDEV_CHANGEMTU: info_ext = ptr; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e26652ff7059..983b4db1868f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2682,44 +2682,15 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, return rth; } -static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie) -{ - return NULL; -} - -static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) -{ - unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); - - return mtu ? : dst->dev->mtu; -} - -static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu, - bool confirm_neigh) -{ -} - -static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb) -{ -} - -static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst, - unsigned long old) -{ - return NULL; -} - static struct dst_ops ipv4_dst_blackhole_ops = { - .family = AF_INET, - .check = ipv4_blackhole_dst_check, - .mtu = ipv4_blackhole_mtu, - .default_advmss = ipv4_default_advmss, - .update_pmtu = ipv4_rt_blackhole_update_pmtu, - .redirect = ipv4_rt_blackhole_redirect, - .cow_metrics = ipv4_rt_blackhole_cow_metrics, - .neigh_lookup = ipv4_neigh_lookup, + .family = AF_INET, + .default_advmss = ipv4_default_advmss, + .neigh_lookup = ipv4_neigh_lookup, + .check = dst_blackhole_check, + .cow_metrics = dst_blackhole_cow_metrics, + .update_pmtu = dst_blackhole_update_pmtu, + .redirect = dst_blackhole_redirect, + .mtu = dst_blackhole_mtu, }; struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 32545ecf2ab1..dff0bc8519e5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2995,6 +2995,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->rx_opt.dsack = 0; tp->rx_opt.num_sacks = 0; tp->rcv_ooopack = 0; + tp->fast_ack_mode = 0; /* Clean up fastopen related fields */ @@ -3431,16 +3432,23 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname, break; case TCP_QUEUE_SEQ: - if (sk->sk_state != TCP_CLOSE) + if (sk->sk_state != TCP_CLOSE) { err = -EPERM; - else if (tp->repair_queue == TCP_SEND_QUEUE) - WRITE_ONCE(tp->write_seq, val); - else if (tp->repair_queue == TCP_RECV_QUEUE) { - WRITE_ONCE(tp->rcv_nxt, val); - WRITE_ONCE(tp->copied_seq, val); - } - else + } else if (tp->repair_queue == TCP_SEND_QUEUE) { + if (!tcp_rtx_queue_empty(sk)) + err = -EPERM; + else + WRITE_ONCE(tp->write_seq, val); + } else if (tp->repair_queue == TCP_RECV_QUEUE) { + if (tp->rcv_nxt != tp->copied_seq) { + err = -EPERM; + } else { + WRITE_ONCE(tp->rcv_nxt, val); + WRITE_ONCE(tp->copied_seq, val); + } + } else { err = -EINVAL; + } break; case TCP_REPAIR_OPTIONS: @@ -4088,7 +4096,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, if (get_user(len, optlen)) return -EFAULT; - if (len < offsetofend(struct tcp_zerocopy_receive, length)) + if (len < 0 || + len < offsetofend(struct tcp_zerocopy_receive, length)) return -EINVAL; if (len > sizeof(zc)) { len = sizeof(zc); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 6ea3dc2e4219..8ef512fefe25 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -292,26 +292,40 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) sk->sk_pacing_rate = rate; } -/* override sysctl_tcp_min_tso_segs */ static u32 bbr_min_tso_segs(struct sock *sk) { return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; } +/* Return the number of segments BBR would like in a TSO/GSO skb, given + * a particular max gso size as a constraint. + */ +static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now, + u32 gso_max_size) +{ + u32 segs; + u64 bytes; + + /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */ + bytes = sk->sk_pacing_rate >> sk->sk_pacing_shift; + + bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER); + segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk)); + return segs; +} + +/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */ +static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) +{ + return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size); +} + +/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */ static u32 bbr_tso_segs_goal(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - u32 segs, bytes; - - /* Sort of tcp_tso_autosize() but ignoring - * driver provided sk_gso_max_size. - */ - bytes = min_t(unsigned long, - sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift), - GSO_MAX_SIZE - 1 - MAX_TCP_HEADER); - segs = max_t(u32, bytes / tp->mss_cache, bbr_min_tso_segs(sk)); - return min(segs, 0x7FU); + return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE); } /* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ @@ -1147,7 +1161,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .undo_cwnd = bbr_undo_cwnd, .cwnd_event = bbr_cwnd_event, .ssthresh = bbr_ssthresh, - .min_tso_segs = bbr_min_tso_segs, + .tso_segs = bbr_tso_segs, .get_info = bbr_get_info, .set_state = bbr_set_state, }; diff --git a/net/ipv4/tcp_bbr2.c b/net/ipv4/tcp_bbr2.c new file mode 100644 index 000000000000..5510adc92bbb --- /dev/null +++ b/net/ipv4/tcp_bbr2.c @@ -0,0 +1,2671 @@ +/* BBR (Bottleneck Bandwidth and RTT) congestion control, v2 + * + * BBRv2 is a model-based congestion control algorithm that aims for low + * queues, low loss, and (bounded) Reno/CUBIC coexistence. To maintain a model + * of the network path, it uses measurements of bandwidth and RTT, as well as + * (if they occur) packet loss and/or DCTCP/L4S-style ECN signals. Note that + * although it can use ECN or loss signals explicitly, it does not require + * either; it can bound its in-flight data based on its estimate of the BDP. + * + * The model has both higher and lower bounds for the operating range: + * lo: bw_lo, inflight_lo: conservative short-term lower bound + * hi: bw_hi, inflight_hi: robust long-term upper bound + * The bandwidth-probing time scale is (a) extended dynamically based on + * estimated BDP to improve coexistence with Reno/CUBIC; (b) bounded by + * an interactive wall-clock time-scale to be more scalable and responsive + * than Reno and CUBIC. + * + * Here is a state transition diagram for BBR: + * + * | + * V + * +---> STARTUP ----+ + * | | | + * | V | + * | DRAIN ----+ + * | | | + * | V | + * +---> PROBE_BW ----+ + * | ^ | | + * | | | | + * | +----+ | + * | | + * +---- PROBE_RTT <--+ + * + * A BBR flow starts in STARTUP, and ramps up its sending rate quickly. + * When it estimates the pipe is full, it enters DRAIN to drain the queue. + * In steady state a BBR flow only uses PROBE_BW and PROBE_RTT. + * A long-lived BBR flow spends the vast majority of its time remaining + * (repeatedly) in PROBE_BW, fully probing and utilizing the pipe's bandwidth + * in a fair manner, with a small, bounded queue. *If* a flow has been + * continuously sending for the entire min_rtt window, and hasn't seen an RTT + * sample that matches or decreases its min_rtt estimate for 10 seconds, then + * it briefly enters PROBE_RTT to cut inflight to a minimum value to re-probe + * the path's two-way propagation delay (min_rtt). When exiting PROBE_RTT, if + * we estimated that we reached the full bw of the pipe then we enter PROBE_BW; + * otherwise we enter STARTUP to try to fill the pipe. + * + * BBR is described in detail in: + * "BBR: Congestion-Based Congestion Control", + * Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh, + * Van Jacobson. ACM Queue, Vol. 14 No. 5, September-October 2016. + * + * There is a public e-mail list for discussing BBR development and testing: + * https://groups.google.com/forum/#!forum/bbr-dev + * + * NOTE: BBR might be used with the fq qdisc ("man tc-fq") with pacing enabled, + * otherwise TCP stack falls back to an internal pacing using one high + * resolution timer per TCP socket and may use more resources. + */ +#include +#include +#include +#include +#include + +#include "tcp_dctcp.h" + +/* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth + * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps. + * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32. + * Since the minimum window is >=4 packets, the lower bound isn't + * an issue. The upper bound isn't an issue with existing technologies. + */ +#define BW_SCALE 24 +#define BW_UNIT (1 << BW_SCALE) + +#define BBR_SCALE 8 /* scaling factor for fractions in BBR (e.g. gains) */ +#define BBR_UNIT (1 << BBR_SCALE) + +#define FLAG_DEBUG_VERBOSE 0x1 /* Verbose debugging messages */ +#define FLAG_DEBUG_LOOPBACK 0x2 /* Do NOT skip loopback addr */ + +#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */ + +/* BBR has the following modes for deciding how fast to send: */ +enum bbr_mode { + BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */ + BBR_DRAIN, /* drain any queue created during startup */ + BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */ + BBR_PROBE_RTT, /* cut inflight to min to probe min_rtt */ +}; + +/* How does the incoming ACK stream relate to our bandwidth probing? */ +enum bbr_ack_phase { + BBR_ACKS_INIT, /* not probing; not getting probe feedback */ + BBR_ACKS_REFILLING, /* sending at est. bw to fill pipe */ + BBR_ACKS_PROBE_STARTING, /* inflight rising to probe bw */ + BBR_ACKS_PROBE_FEEDBACK, /* getting feedback from bw probing */ + BBR_ACKS_PROBE_STOPPING, /* stopped probing; still getting feedback */ +}; + +/* BBR congestion control block */ +struct bbr { + u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */ + u32 min_rtt_stamp; /* timestamp of min_rtt_us */ + u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */ + u32 probe_rtt_min_us; /* min RTT in bbr_probe_rtt_win_ms window */ + u32 probe_rtt_min_stamp; /* timestamp of probe_rtt_min_us*/ + u32 next_rtt_delivered; /* scb->tx.delivered at end of round */ + u32 prior_rcv_nxt; /* tp->rcv_nxt when CE state last changed */ + u64 cycle_mstamp; /* time of this cycle phase start */ + u32 mode:3, /* current bbr_mode in state machine */ + prev_ca_state:3, /* CA state on previous ACK */ + packet_conservation:1, /* use packet conservation? */ + round_start:1, /* start of packet-timed tx->ack round? */ + ce_state:1, /* If most recent data has CE bit set */ + bw_probe_up_rounds:5, /* cwnd-limited rounds in PROBE_UP */ + try_fast_path:1, /* can we take fast path? */ + unused2:11, + idle_restart:1, /* restarting after idle? */ + probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ + cycle_idx:3, /* current index in pacing_gain cycle array */ + has_seen_rtt:1; /* have we seen an RTT sample yet? */ + u32 pacing_gain:11, /* current gain for setting pacing rate */ + cwnd_gain:11, /* current gain for setting cwnd */ + full_bw_reached:1, /* reached full bw in Startup? */ + full_bw_cnt:2, /* number of rounds without large bw gains */ + init_cwnd:7; /* initial cwnd */ + u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ + u32 full_bw; /* recent bw, to estimate if pipe is full */ + + /* For tracking ACK aggregation: */ + u64 ack_epoch_mstamp; /* start of ACK sampling epoch */ + u16 extra_acked[2]; /* max excess data ACKed in epoch */ + u32 ack_epoch_acked:20, /* packets (S)ACKed in sampling epoch */ + extra_acked_win_rtts:5, /* age of extra_acked, in round trips */ + extra_acked_win_idx:1, /* current index in extra_acked array */ + /* BBR v2 state: */ + unused1:2, + startup_ecn_rounds:2, /* consecutive hi ECN STARTUP rounds */ + loss_in_cycle:1, /* packet loss in this cycle? */ + ecn_in_cycle:1; /* ECN in this cycle? */ + u32 loss_round_delivered; /* scb->tx.delivered ending loss round */ + u32 undo_bw_lo; /* bw_lo before latest losses */ + u32 undo_inflight_lo; /* inflight_lo before latest losses */ + u32 undo_inflight_hi; /* inflight_hi before latest losses */ + u32 bw_latest; /* max delivered bw in last round trip */ + u32 bw_lo; /* lower bound on sending bandwidth */ + u32 bw_hi[2]; /* upper bound of sending bandwidth range*/ + u32 inflight_latest; /* max delivered data in last round trip */ + u32 inflight_lo; /* lower bound of inflight data range */ + u32 inflight_hi; /* upper bound of inflight data range */ + u32 bw_probe_up_cnt; /* packets delivered per inflight_hi incr */ + u32 bw_probe_up_acks; /* packets (S)ACKed since inflight_hi incr */ + u32 probe_wait_us; /* PROBE_DOWN until next clock-driven probe */ + u32 ecn_eligible:1, /* sender can use ECN (RTT, handshake)? */ + ecn_alpha:9, /* EWMA delivered_ce/delivered; 0..256 */ + bw_probe_samples:1, /* rate samples reflect bw probing? */ + prev_probe_too_high:1, /* did last PROBE_UP go too high? */ + stopped_risky_probe:1, /* last PROBE_UP stopped due to risk? */ + rounds_since_probe:8, /* packet-timed rounds since probed bw */ + loss_round_start:1, /* loss_round_delivered round trip? */ + loss_in_round:1, /* loss marked in this round trip? */ + ecn_in_round:1, /* ECN marked in this round trip? */ + ack_phase:3, /* bbr_ack_phase: meaning of ACKs */ + loss_events_in_round:4,/* losses in STARTUP round */ + initialized:1; /* has bbr_init() been called? */ + u32 alpha_last_delivered; /* tp->delivered at alpha update */ + u32 alpha_last_delivered_ce; /* tp->delivered_ce at alpha update */ + + /* Params configurable using setsockopt. Refer to correspoding + * module param for detailed description of params. + */ + struct bbr_params { + u32 high_gain:11, /* max allowed value: 2047 */ + drain_gain:10, /* max allowed value: 1023 */ + cwnd_gain:11; /* max allowed value: 2047 */ + u32 cwnd_min_target:4, /* max allowed value: 15 */ + min_rtt_win_sec:5, /* max allowed value: 31 */ + probe_rtt_mode_ms:9, /* max allowed value: 511 */ + full_bw_cnt:3, /* max allowed value: 7 */ + cwnd_tso_budget:1, /* allowed values: {0, 1} */ + unused3:6, + drain_to_target:1, /* boolean */ + precise_ece_ack:1, /* boolean */ + extra_acked_in_startup:1, /* allowed values: {0, 1} */ + fast_path:1; /* boolean */ + u32 full_bw_thresh:10, /* max allowed value: 1023 */ + startup_cwnd_gain:11, /* max allowed value: 2047 */ + bw_probe_pif_gain:9, /* max allowed value: 511 */ + usage_based_cwnd:1, /* boolean */ + unused2:1; + u16 probe_rtt_win_ms:14, /* max allowed value: 16383 */ + refill_add_inc:2; /* max allowed value: 3 */ + u16 extra_acked_gain:11, /* max allowed value: 2047 */ + extra_acked_win_rtts:5; /* max allowed value: 31*/ + u16 pacing_gain[CYCLE_LEN]; /* max allowed value: 1023 */ + /* Mostly BBR v2 parameters below here: */ + u32 ecn_alpha_gain:8, /* max allowed value: 255 */ + ecn_factor:8, /* max allowed value: 255 */ + ecn_thresh:8, /* max allowed value: 255 */ + beta:8; /* max allowed value: 255 */ + u32 ecn_max_rtt_us:19, /* max allowed value: 524287 */ + bw_probe_reno_gain:9, /* max allowed value: 511 */ + full_loss_cnt:4; /* max allowed value: 15 */ + u32 probe_rtt_cwnd_gain:8, /* max allowed value: 255 */ + inflight_headroom:8, /* max allowed value: 255 */ + loss_thresh:8, /* max allowed value: 255 */ + bw_probe_max_rounds:8; /* max allowed value: 255 */ + u32 bw_probe_rand_rounds:4, /* max allowed value: 15 */ + bw_probe_base_us:26, /* usecs: 0..2^26-1 (67 secs) */ + full_ecn_cnt:2; /* max allowed value: 3 */ + u32 bw_probe_rand_us:26, /* usecs: 0..2^26-1 (67 secs) */ + undo:1, /* boolean */ + tso_rtt_shift:4, /* max allowed value: 15 */ + unused5:1; + u32 ecn_reprobe_gain:9, /* max allowed value: 511 */ + unused1:14, + ecn_alpha_init:9; /* max allowed value: 256 */ + } params; + + struct { + u32 snd_isn; /* Initial sequence number */ + u32 rs_bw; /* last valid rate sample bw */ + u32 target_cwnd; /* target cwnd, based on BDP */ + u8 undo:1, /* Undo even happened but not yet logged */ + unused:7; + char event; /* single-letter event debug codes */ + u16 unused2; + } debug; +}; + +struct bbr_context { + u32 sample_bw; + u32 target_cwnd; + u32 log:1; +}; + +/* Window length of min_rtt filter (in sec). Max allowed value is 31 (0x1F) */ +static u32 bbr_min_rtt_win_sec = 10; +/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode. + * Max allowed value is 511 (0x1FF). + */ +static u32 bbr_probe_rtt_mode_ms = 200; +/* Window length of probe_rtt_min_us filter (in ms), and consequently the + * typical interval between PROBE_RTT mode entries. + * Note that bbr_probe_rtt_win_ms must be <= bbr_min_rtt_win_sec * MSEC_PER_SEC + */ +static u32 bbr_probe_rtt_win_ms = 5000; +/* Skip TSO below the following bandwidth (bits/sec): */ +static int bbr_min_tso_rate = 1200000; + +/* Use min_rtt to help adapt TSO burst size, with smaller min_rtt resulting + * in bigger TSO bursts. By default we cut the RTT-based allowance in half + * for every 2^9 usec (aka 512 us) of RTT, so that the RTT-based allowance + * is below 1500 bytes after 6 * ~500 usec = 3ms. + */ +static u32 bbr_tso_rtt_shift = 9; /* halve allowance per 2^9 usecs, 512us */ + +/* Select cwnd TSO budget approach: + * 0: padding + * 1: flooring + */ +static uint bbr_cwnd_tso_budget = 1; + +/* Pace at ~1% below estimated bw, on average, to reduce queue at bottleneck. + * In order to help drive the network toward lower queues and low latency while + * maintaining high utilization, the average pacing rate aims to be slightly + * lower than the estimated bandwidth. This is an important aspect of the + * design. + */ +static const int bbr_pacing_margin_percent = 1; + +/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain + * that will allow a smoothly increasing pacing rate that will double each RTT + * and send the same number of packets per RTT that an un-paced, slow-starting + * Reno or CUBIC flow would. Max allowed value is 2047 (0x7FF). + */ +static int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1; +/* The gain for deriving startup cwnd. Max allowed value is 2047 (0x7FF). */ +static int bbr_startup_cwnd_gain = BBR_UNIT * 2885 / 1000 + 1; +/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain + * the queue created in BBR_STARTUP in a single round. Max allowed value + * is 1023 (0x3FF). + */ +static int bbr_drain_gain = BBR_UNIT * 1000 / 2885; +/* The gain for deriving steady-state cwnd tolerates delayed/stretched ACKs. + * Max allowed value is 2047 (0x7FF). + */ +static int bbr_cwnd_gain = BBR_UNIT * 2; +/* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw. + * Max allowed value for each element is 1023 (0x3FF). + */ +enum bbr_pacing_gain_phase { + BBR_BW_PROBE_UP = 0, /* push up inflight to probe for bw/vol */ + BBR_BW_PROBE_DOWN = 1, /* drain excess inflight from the queue */ + BBR_BW_PROBE_CRUISE = 2, /* use pipe, w/ headroom in queue/pipe */ + BBR_BW_PROBE_REFILL = 3, /* v2: refill the pipe again to 100% */ +}; +static int bbr_pacing_gain[] = { + BBR_UNIT * 5 / 4, /* probe for more available bw */ + BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */ + BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */ + BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */ +}; + +/* Try to keep at least this many packets in flight, if things go smoothly. For + * smooth functioning, a sliding window protocol ACKing every other packet + * needs at least 4 packets in flight. Max allowed value is 15 (0xF). + */ +static u32 bbr_cwnd_min_target = 4; + +/* Cwnd to BDP proportion in PROBE_RTT mode scaled by BBR_UNIT. Default: 50%. + * Use 0 to disable. Max allowed value is 255. + */ +static u32 bbr_probe_rtt_cwnd_gain = BBR_UNIT * 1 / 2; + +/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */ +/* If bw has increased significantly (1.25x), there may be more bw available. + * Max allowed value is 1023 (0x3FF). + */ +static u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4; +/* But after 3 rounds w/o significant bw growth, estimate pipe is full. + * Max allowed value is 7 (0x7). + */ +static u32 bbr_full_bw_cnt = 3; + +static u32 bbr_flags; /* Debugging related stuff */ + +/* Whether to debug using printk. + */ +static bool bbr_debug_with_printk; + +/* Whether to debug using ftrace event tcp:tcp_bbr_event. + * Ignored when bbr_debug_with_printk is set. + */ +static bool bbr_debug_ftrace; + +/* Experiment: each cycle, try to hold sub-unity gain until inflight <= BDP. */ +static bool bbr_drain_to_target = true; /* default: enabled */ + +/* Experiment: Flags to control BBR with ECN behavior. + */ +static bool bbr_precise_ece_ack = true; /* default: enabled */ + +/* The max rwin scaling shift factor is 14 (RFC 1323), so the max sane rwin is + * (2^(16+14) B)/(1024 B/packet) = 1M packets. + */ +static u32 bbr_cwnd_warn_val = 1U << 20; + +static u16 bbr_debug_port_mask; + +/* BBR module parameters. These are module parameters only in Google prod. + * Upstream these are intentionally not module parameters. + */ +static int bbr_pacing_gain_size = CYCLE_LEN; + +/* Gain factor for adding extra_acked to target cwnd: */ +static int bbr_extra_acked_gain = 256; + +/* Window length of extra_acked window. Max allowed val is 31. */ +static u32 bbr_extra_acked_win_rtts = 5; + +/* Max allowed val for ack_epoch_acked, after which sampling epoch is reset */ +static u32 bbr_ack_epoch_acked_reset_thresh = 1U << 20; + +/* Time period for clamping cwnd increment due to ack aggregation */ +static u32 bbr_extra_acked_max_us = 100 * 1000; + +/* Use extra acked in startup ? + * 0: disabled + * 1: use latest extra_acked value from 1-2 rtt in startup + */ +static int bbr_extra_acked_in_startup = 1; /* default: enabled */ + +/* Experiment: don't grow cwnd beyond twice of what we just probed. */ +static bool bbr_usage_based_cwnd; /* default: disabled */ + +/* For lab testing, researchers can enable BBRv2 ECN support with this flag, + * when they know that any ECN marks that the connections experience will be + * DCTCP/L4S-style ECN marks, rather than RFC3168 ECN marks. + * TODO(ncardwell): Production use of the BBRv2 ECN functionality depends on + * negotiation or configuration that is outside the scope of the BBRv2 + * alpha release. + */ +static bool bbr_ecn_enable = false; + +module_param_named(min_tso_rate, bbr_min_tso_rate, int, 0644); +module_param_named(tso_rtt_shift, bbr_tso_rtt_shift, int, 0644); +module_param_named(high_gain, bbr_high_gain, int, 0644); +module_param_named(drain_gain, bbr_drain_gain, int, 0644); +module_param_named(startup_cwnd_gain, bbr_startup_cwnd_gain, int, 0644); +module_param_named(cwnd_gain, bbr_cwnd_gain, int, 0644); +module_param_array_named(pacing_gain, bbr_pacing_gain, int, + &bbr_pacing_gain_size, 0644); +module_param_named(cwnd_min_target, bbr_cwnd_min_target, uint, 0644); +module_param_named(probe_rtt_cwnd_gain, + bbr_probe_rtt_cwnd_gain, uint, 0664); +module_param_named(cwnd_warn_val, bbr_cwnd_warn_val, uint, 0664); +module_param_named(debug_port_mask, bbr_debug_port_mask, ushort, 0644); +module_param_named(flags, bbr_flags, uint, 0644); +module_param_named(debug_ftrace, bbr_debug_ftrace, bool, 0644); +module_param_named(debug_with_printk, bbr_debug_with_printk, bool, 0644); +module_param_named(min_rtt_win_sec, bbr_min_rtt_win_sec, uint, 0644); +module_param_named(probe_rtt_mode_ms, bbr_probe_rtt_mode_ms, uint, 0644); +module_param_named(probe_rtt_win_ms, bbr_probe_rtt_win_ms, uint, 0644); +module_param_named(full_bw_thresh, bbr_full_bw_thresh, uint, 0644); +module_param_named(full_bw_cnt, bbr_full_bw_cnt, uint, 0644); +module_param_named(cwnd_tso_bduget, bbr_cwnd_tso_budget, uint, 0664); +module_param_named(extra_acked_gain, bbr_extra_acked_gain, int, 0664); +module_param_named(extra_acked_win_rtts, + bbr_extra_acked_win_rtts, uint, 0664); +module_param_named(extra_acked_max_us, + bbr_extra_acked_max_us, uint, 0664); +module_param_named(ack_epoch_acked_reset_thresh, + bbr_ack_epoch_acked_reset_thresh, uint, 0664); +module_param_named(drain_to_target, bbr_drain_to_target, bool, 0664); +module_param_named(precise_ece_ack, bbr_precise_ece_ack, bool, 0664); +module_param_named(extra_acked_in_startup, + bbr_extra_acked_in_startup, int, 0664); +module_param_named(usage_based_cwnd, bbr_usage_based_cwnd, bool, 0664); +module_param_named(ecn_enable, bbr_ecn_enable, bool, 0664); + +static void bbr2_exit_probe_rtt(struct sock *sk); +static void bbr2_reset_congestion_signals(struct sock *sk); + +static void bbr_check_probe_rtt_done(struct sock *sk); + +/* Do we estimate that STARTUP filled the pipe? */ +static bool bbr_full_bw_reached(const struct sock *sk) +{ + const struct bbr *bbr = inet_csk_ca(sk); + + return bbr->full_bw_reached; +} + +/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ +static u32 bbr_max_bw(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return max(bbr->bw_hi[0], bbr->bw_hi[1]); +} + +/* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */ +static u32 bbr_bw(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return min(bbr_max_bw(sk), bbr->bw_lo); +} + +/* Return maximum extra acked in past k-2k round trips, + * where k = bbr_extra_acked_win_rtts. + */ +static u16 bbr_extra_acked(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return max(bbr->extra_acked[0], bbr->extra_acked[1]); +} + +/* Return rate in bytes per second, optionally with a gain. + * The order here is chosen carefully to avoid overflow of u64. This should + * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. + */ +static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain, + int margin) +{ + unsigned int mss = tcp_sk(sk)->mss_cache; + + rate *= mss; + rate *= gain; + rate >>= BBR_SCALE; + rate *= USEC_PER_SEC / 100 * (100 - margin); + rate >>= BW_SCALE; + rate = max(rate, 1ULL); + return rate; +} + +static u64 bbr_bw_bytes_per_sec(struct sock *sk, u64 rate) +{ + return bbr_rate_bytes_per_sec(sk, rate, BBR_UNIT, 0); +} + +static u64 bbr_rate_kbps(struct sock *sk, u64 rate) +{ + rate = bbr_bw_bytes_per_sec(sk, rate); + rate *= 8; + do_div(rate, 1000); + return rate; +} + +static u32 bbr_tso_segs_goal(struct sock *sk); +static void bbr_debug(struct sock *sk, u32 acked, + const struct rate_sample *rs, struct bbr_context *ctx) +{ + static const char ca_states[] = { + [TCP_CA_Open] = 'O', + [TCP_CA_Disorder] = 'D', + [TCP_CA_CWR] = 'C', + [TCP_CA_Recovery] = 'R', + [TCP_CA_Loss] = 'L', + }; + static const char mode[] = { + 'G', /* Growing - BBR_STARTUP */ + 'D', /* Drain - BBR_DRAIN */ + 'W', /* Window - BBR_PROBE_BW */ + 'M', /* Min RTT - BBR_PROBE_RTT */ + }; + static const char ack_phase[] = { /* bbr_ack_phase strings */ + 'I', /* BBR_ACKS_INIT - 'Init' */ + 'R', /* BBR_ACKS_REFILLING - 'Refilling' */ + 'B', /* BBR_ACKS_PROBE_STARTING - 'Before' */ + 'F', /* BBR_ACKS_PROBE_FEEDBACK - 'Feedback' */ + 'A', /* BBR_ACKS_PROBE_STOPPING - 'After' */ + }; + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + const u32 una = tp->snd_una - bbr->debug.snd_isn; + const u32 fack = tcp_highest_sack_seq(tp); + const u16 dport = ntohs(inet_sk(sk)->inet_dport); + bool is_port_match = (bbr_debug_port_mask && + ((dport & bbr_debug_port_mask) == 0)); + char debugmsg[320]; + + if (sk->sk_state == TCP_SYN_SENT) + return; /* no bbr_init() yet if SYN retransmit -> CA_Loss */ + + if (!tp->snd_cwnd || tp->snd_cwnd > bbr_cwnd_warn_val) { + char addr[INET6_ADDRSTRLEN + 10] = { 0 }; + + if (sk->sk_family == AF_INET) + snprintf(addr, sizeof(addr), "%pI4:%u", + &inet_sk(sk)->inet_daddr, dport); + else if (sk->sk_family == AF_INET6) + snprintf(addr, sizeof(addr), "%pI6:%u", + &sk->sk_v6_daddr, dport); + + WARN_ONCE(1, + "BBR %s cwnd alert: %u " + "snd_una: %u ca: %d pacing_gain: %u cwnd_gain: %u " + "bw: %u rtt: %u min_rtt: %u " + "acked: %u tso_segs: %u " + "bw: %d %ld %d pif: %u\n", + addr, tp->snd_cwnd, + una, inet_csk(sk)->icsk_ca_state, + bbr->pacing_gain, bbr->cwnd_gain, + bbr_max_bw(sk), (tp->srtt_us >> 3), bbr->min_rtt_us, + acked, bbr_tso_segs_goal(sk), + rs->delivered, rs->interval_us, rs->is_retrans, + tcp_packets_in_flight(tp)); + } + + if (likely(!bbr_debug_with_printk && !bbr_debug_ftrace)) + return; + + if (!sock_flag(sk, SOCK_DBG) && !is_port_match) + return; + + if (!ctx->log && !tp->app_limited && !(bbr_flags & FLAG_DEBUG_VERBOSE)) + return; + + if (ipv4_is_loopback(inet_sk(sk)->inet_daddr) && + !(bbr_flags & FLAG_DEBUG_LOOPBACK)) + return; + + snprintf(debugmsg, sizeof(debugmsg) - 1, + "BBR %pI4:%-5u %5u,%03u:%-7u %c " + "%c %2u br %2u cr %2d rtt %5ld d %2d i %5ld mrtt %d %cbw %llu " + "bw %llu lb %llu ib %llu qb %llu " + "a %u if %2u %c %c dl %u l %u al %u # %u t %u %c %c " + "lr %d er %d ea %d bwl %lld il %d ih %d c %d " + "v %d %c %u %c %s\n", + &inet_sk(sk)->inet_daddr, dport, + una / 1000, una % 1000, fack - tp->snd_una, + ca_states[inet_csk(sk)->icsk_ca_state], + bbr->debug.undo ? '@' : mode[bbr->mode], + tp->snd_cwnd, + bbr_extra_acked(sk), /* br (legacy): extra_acked */ + rs->tx_in_flight, /* cr (legacy): tx_inflight */ + rs->rtt_us, + rs->delivered, + rs->interval_us, + bbr->min_rtt_us, + rs->is_app_limited ? '_' : 'l', + bbr_rate_kbps(sk, ctx->sample_bw), /* lbw: latest sample bw */ + bbr_rate_kbps(sk, bbr_max_bw(sk)), /* bw: max bw */ + 0ULL, /* lb: [obsolete] */ + 0ULL, /* ib: [obsolete] */ + (u64)sk->sk_pacing_rate * 8 / 1000, + acked, + tcp_packets_in_flight(tp), + rs->is_ack_delayed ? 'd' : '.', + bbr->round_start ? '*' : '.', + tp->delivered, tp->lost, + tp->app_limited, + 0, /* #: [obsolete] */ + ctx->target_cwnd, + tp->reord_seen ? 'r' : '.', /* r: reordering seen? */ + ca_states[bbr->prev_ca_state], + (rs->lost + rs->delivered) > 0 ? + (1000 * rs->lost / + (rs->lost + rs->delivered)) : 0, /* lr: loss rate x1000 */ + (rs->delivered) > 0 ? + (1000 * rs->delivered_ce / + (rs->delivered)) : 0, /* er: ECN rate x1000 */ + 1000 * bbr->ecn_alpha >> BBR_SCALE, /* ea: ECN alpha x1000 */ + bbr->bw_lo == ~0U ? + -1 : (s64)bbr_rate_kbps(sk, bbr->bw_lo), /* bwl */ + bbr->inflight_lo, /* il */ + bbr->inflight_hi, /* ih */ + bbr->bw_probe_up_cnt, /* c */ + 2, /* v: version */ + bbr->debug.event, + bbr->cycle_idx, + ack_phase[bbr->ack_phase], + bbr->bw_probe_samples ? "Y" : "N"); + debugmsg[sizeof(debugmsg) - 1] = 0; + + /* printk takes a higher precedence. */ + if (bbr_debug_with_printk) + printk(KERN_DEBUG "%s", debugmsg); + + if (unlikely(bbr->debug.undo)) + bbr->debug.undo = 0; +} + +/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ +static unsigned long bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) +{ + u64 rate = bw; + + rate = bbr_rate_bytes_per_sec(sk, rate, gain, + bbr_pacing_margin_percent); + rate = min_t(u64, rate, sk->sk_max_pacing_rate); + return rate; +} + +/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ +static void bbr_init_pacing_rate_from_rtt(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + u32 rtt_us; + + if (tp->srtt_us) { /* any RTT sample yet? */ + rtt_us = max(tp->srtt_us >> 3, 1U); + bbr->has_seen_rtt = 1; + } else { /* no RTT sample yet */ + rtt_us = USEC_PER_MSEC; /* use nominal default RTT */ + } + bw = (u64)tp->snd_cwnd * BW_UNIT; + do_div(bw, rtt_us); + sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr->params.high_gain); +} + +/* Pace using current bw estimate and a gain factor. */ +static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + unsigned long rate = bbr_bw_to_pacing_rate(sk, bw, gain); + + if (unlikely(!bbr->has_seen_rtt && tp->srtt_us)) + bbr_init_pacing_rate_from_rtt(sk); + if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) + sk->sk_pacing_rate = rate; +} + +static u32 bbr_min_tso_segs(struct sock *sk) +{ + return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; +} + +/* Return the number of segments BBR would like in a TSO/GSO skb, given + * a particular max gso size as a constraint. + */ +static u32 bbr_tso_segs_generic(struct sock *sk, unsigned int mss_now, + u32 gso_max_size) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 segs, r; + u64 bytes; + + /* Budget a TSO/GSO burst size allowance based on bw (pacing_rate). */ + bytes = sk->sk_pacing_rate >> sk->sk_pacing_shift; + + /* Budget a TSO/GSO burst size allowance based on min_rtt. For every + * K = 2^tso_rtt_shift microseconds of min_rtt, halve the burst. + * The min_rtt-based burst allowance is: 64 KBytes / 2^(min_rtt/K) + */ + if (bbr->params.tso_rtt_shift) { + r = bbr->min_rtt_us >> bbr->params.tso_rtt_shift; + if (r < BITS_PER_TYPE(u32)) /* prevent undefined behavior */ + bytes += GSO_MAX_SIZE >> r; + } + + bytes = min_t(u32, bytes, gso_max_size - 1 - MAX_TCP_HEADER); + segs = max_t(u32, bytes / mss_now, bbr_min_tso_segs(sk)); + return segs; +} + +/* Custom tcp_tso_autosize() for BBR, used at transmit time to cap skb size. */ +static u32 bbr_tso_segs(struct sock *sk, unsigned int mss_now) +{ + return bbr_tso_segs_generic(sk, mss_now, sk->sk_gso_max_size); +} + +/* Like bbr_tso_segs(), using mss_cache, ignoring driver's sk_gso_max_size. */ +static u32 bbr_tso_segs_goal(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + return bbr_tso_segs_generic(sk, tp->mss_cache, GSO_MAX_SIZE); +} + +/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ +static void bbr_save_cwnd(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT) + bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */ + else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */ + bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd); +} + +static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (event == CA_EVENT_TX_START && tp->app_limited) { + bbr->idle_restart = 1; + bbr->ack_epoch_mstamp = tp->tcp_mstamp; + bbr->ack_epoch_acked = 0; + /* Avoid pointless buffer overflows: pace at est. bw if we don't + * need more speed (we're restarting from idle and app-limited). + */ + if (bbr->mode == BBR_PROBE_BW) + bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); + else if (bbr->mode == BBR_PROBE_RTT) + bbr_check_probe_rtt_done(sk); + } else if ((event == CA_EVENT_ECN_IS_CE || + event == CA_EVENT_ECN_NO_CE) && + bbr_ecn_enable && + bbr->params.precise_ece_ack) { + u32 state = bbr->ce_state; + dctcp_ece_ack_update(sk, event, &bbr->prior_rcv_nxt, &state); + bbr->ce_state = state; + if (tp->fast_ack_mode == 2 && event == CA_EVENT_ECN_IS_CE) + tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); + } +} + +/* Calculate bdp based on min RTT and the estimated bottleneck bandwidth: + * + * bdp = ceil(bw * min_rtt * gain) + * + * The key factor, gain, controls the amount of queue. While a small gain + * builds a smaller queue, it becomes more vulnerable to noise in RTT + * measurements (e.g., delayed ACKs or other ACK compression effects). This + * noise may cause BBR to under-estimate the rate. + */ +static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bdp; + u64 w; + + /* If we've never had a valid RTT sample, cap cwnd at the initial + * default. This should only happen when the connection is not using TCP + * timestamps and has retransmitted all of the SYN/SYNACK/data packets + * ACKed so far. In this case, an RTO can cut cwnd to 1, in which + * case we need to slow-start up toward something safe: initial cwnd. + */ + if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */ + return bbr->init_cwnd; /* be safe: cap at initial cwnd */ + + w = (u64)bw * bbr->min_rtt_us; + + /* Apply a gain to the given value, remove the BW_SCALE shift, and + * round the value up to avoid a negative feedback loop. + */ + bdp = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; + + return bdp; +} + +/* To achieve full performance in high-speed paths, we budget enough cwnd to + * fit full-sized skbs in-flight on both end hosts to fully utilize the path: + * - one skb in sending host Qdisc, + * - one skb in sending host TSO/GSO engine + * - one skb being received by receiver host LRO/GRO/delayed-ACK engine + * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because + * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, + * which allows 2 outstanding 2-packet sequences, to try to keep pipe + * full even with ACK-every-other-packet delayed ACKs. + */ +static u32 bbr_quantization_budget(struct sock *sk, u32 cwnd) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 tso_segs_goal; + + tso_segs_goal = 3 * bbr_tso_segs_goal(sk); + + /* Allow enough full-sized skbs in flight to utilize end systems. */ + if (bbr->params.cwnd_tso_budget == 1) { + cwnd = max_t(u32, cwnd, tso_segs_goal); + cwnd = max_t(u32, cwnd, bbr->params.cwnd_min_target); + } else { + cwnd += tso_segs_goal; + cwnd = (cwnd + 1) & ~1U; + } + /* Ensure gain cycling gets inflight above BDP even for small BDPs. */ + if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) + cwnd += 2; + + return cwnd; +} + +/* Find inflight based on min RTT and the estimated bottleneck bandwidth. */ +static u32 bbr_inflight(struct sock *sk, u32 bw, int gain) +{ + u32 inflight; + + inflight = bbr_bdp(sk, bw, gain); + inflight = bbr_quantization_budget(sk, inflight); + + return inflight; +} + +/* With pacing at lower layers, there's often less data "in the network" than + * "in flight". With TSQ and departure time pacing at lower layers (e.g. fq), + * we often have several skbs queued in the pacing layer with a pre-scheduled + * earliest departure time (EDT). BBR adapts its pacing rate based on the + * inflight level that it estimates has already been "baked in" by previous + * departure time decisions. We calculate a rough estimate of the number of our + * packets that might be in the network at the earliest departure time for the + * next skb scheduled: + * in_network_at_edt = inflight_at_edt - (EDT - now) * bw + * If we're increasing inflight, then we want to know if the transmit of the + * EDT skb will push inflight above the target, so inflight_at_edt includes + * bbr_tso_segs_goal() from the skb departing at EDT. If decreasing inflight, + * then estimate if inflight will sink too low just before the EDT transmit. + */ +static u32 bbr_packets_in_net_at_edt(struct sock *sk, u32 inflight_now) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 now_ns, edt_ns, interval_us; + u32 interval_delivered, inflight_at_edt; + + now_ns = tp->tcp_clock_cache; + edt_ns = max(tp->tcp_wstamp_ns, now_ns); + interval_us = div_u64(edt_ns - now_ns, NSEC_PER_USEC); + interval_delivered = (u64)bbr_bw(sk) * interval_us >> BW_SCALE; + inflight_at_edt = inflight_now; + if (bbr->pacing_gain > BBR_UNIT) /* increasing inflight */ + inflight_at_edt += bbr_tso_segs_goal(sk); /* include EDT skb */ + if (interval_delivered >= inflight_at_edt) + return 0; + return inflight_at_edt - interval_delivered; +} + +/* Find the cwnd increment based on estimate of ack aggregation */ +static u32 bbr_ack_aggregation_cwnd(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 max_aggr_cwnd, aggr_cwnd = 0; + + if (bbr->params.extra_acked_gain && + (bbr_full_bw_reached(sk) || bbr->params.extra_acked_in_startup)) { + max_aggr_cwnd = ((u64)bbr_bw(sk) * bbr_extra_acked_max_us) + / BW_UNIT; + aggr_cwnd = (bbr->params.extra_acked_gain * bbr_extra_acked(sk)) + >> BBR_SCALE; + aggr_cwnd = min(aggr_cwnd, max_aggr_cwnd); + } + + return aggr_cwnd; +} + +/* Returns the cwnd for PROBE_RTT mode. */ +static u32 bbr_probe_rtt_cwnd(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->params.probe_rtt_cwnd_gain == 0) + return bbr->params.cwnd_min_target; + return max_t(u32, bbr->params.cwnd_min_target, + bbr_bdp(sk, bbr_bw(sk), bbr->params.probe_rtt_cwnd_gain)); +} + +/* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss + * has drawn us down below target), or snap down to target if we're above it. + */ +static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, + u32 acked, u32 bw, int gain, u32 cwnd, + struct bbr_context *ctx) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 target_cwnd = 0, prev_cwnd = tp->snd_cwnd, max_probe; + + if (!acked) + goto done; /* no packet fully ACKed; just apply caps */ + + target_cwnd = bbr_bdp(sk, bw, gain); + + /* Increment the cwnd to account for excess ACKed data that seems + * due to aggregation (of data and/or ACKs) visible in the ACK stream. + */ + target_cwnd += bbr_ack_aggregation_cwnd(sk); + target_cwnd = bbr_quantization_budget(sk, target_cwnd); + + /* If we're below target cwnd, slow start cwnd toward target cwnd. */ + bbr->debug.target_cwnd = target_cwnd; + + /* Update cwnd and enable fast path if cwnd reaches target_cwnd. */ + bbr->try_fast_path = 0; + if (bbr_full_bw_reached(sk)) { /* only cut cwnd if we filled the pipe */ + cwnd += acked; + if (cwnd >= target_cwnd) { + cwnd = target_cwnd; + bbr->try_fast_path = 1; + } + } else if (cwnd < target_cwnd || cwnd < 2 * bbr->init_cwnd) { + cwnd += acked; + } else { + bbr->try_fast_path = 1; + } + + /* When growing cwnd, don't grow beyond twice what we just probed. */ + if (bbr->params.usage_based_cwnd) { + max_probe = max(2 * tp->max_packets_out, tp->snd_cwnd); + cwnd = min(cwnd, max_probe); + } + + cwnd = max_t(u32, cwnd, bbr->params.cwnd_min_target); +done: + tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */ + if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */ + tp->snd_cwnd = min_t(u32, tp->snd_cwnd, bbr_probe_rtt_cwnd(sk)); + + ctx->target_cwnd = target_cwnd; + ctx->log = (tp->snd_cwnd != prev_cwnd); +} + +/* See if we have reached next round trip */ +static void bbr_update_round_start(struct sock *sk, + const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->round_start = 0; + + /* See if we've reached the next RTT */ + if (rs->interval_us > 0 && + !before(rs->prior_delivered, bbr->next_rtt_delivered)) { + bbr->next_rtt_delivered = tp->delivered; + bbr->round_start = 1; + } +} + +/* Calculate the bandwidth based on how fast packets are delivered */ +static void bbr_calculate_bw_sample(struct sock *sk, + const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + u64 bw = 0; + + /* Divide delivered by the interval to find a (lower bound) bottleneck + * bandwidth sample. Delivered is in packets and interval_us in uS and + * ratio will be <<1 for most connections. So delivered is first scaled. + * Round up to allow growth at low rates, even with integer division. + */ + if (rs->interval_us > 0) { + if (WARN_ONCE(rs->delivered < 0, + "negative delivered: %d interval_us: %ld\n", + rs->delivered, rs->interval_us)) + return; + + bw = DIV_ROUND_UP_ULL((u64)rs->delivered * BW_UNIT, rs->interval_us); + } + + ctx->sample_bw = bw; + bbr->debug.rs_bw = bw; +} + +/* Estimates the windowed max degree of ack aggregation. + * This is used to provision extra in-flight data to keep sending during + * inter-ACK silences. + * + * Degree of ack aggregation is estimated as extra data acked beyond expected. + * + * max_extra_acked = "maximum recent excess data ACKed beyond max_bw * interval" + * cwnd += max_extra_acked + * + * Max extra_acked is clamped by cwnd and bw * bbr_extra_acked_max_us (100 ms). + * Max filter is an approximate sliding window of 5-10 (packet timed) round + * trips for non-startup phase, and 1-2 round trips for startup. + */ +static void bbr_update_ack_aggregation(struct sock *sk, + const struct rate_sample *rs) +{ + u32 epoch_us, expected_acked, extra_acked; + struct bbr *bbr = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); + u32 extra_acked_win_rtts_thresh = bbr->params.extra_acked_win_rtts; + + if (!bbr->params.extra_acked_gain || rs->acked_sacked <= 0 || + rs->delivered < 0 || rs->interval_us <= 0) + return; + + if (bbr->round_start) { + bbr->extra_acked_win_rtts = min(0x1F, + bbr->extra_acked_win_rtts + 1); + if (bbr->params.extra_acked_in_startup && + !bbr_full_bw_reached(sk)) + extra_acked_win_rtts_thresh = 1; + if (bbr->extra_acked_win_rtts >= + extra_acked_win_rtts_thresh) { + bbr->extra_acked_win_rtts = 0; + bbr->extra_acked_win_idx = bbr->extra_acked_win_idx ? + 0 : 1; + bbr->extra_acked[bbr->extra_acked_win_idx] = 0; + } + } + + /* Compute how many packets we expected to be delivered over epoch. */ + epoch_us = tcp_stamp_us_delta(tp->delivered_mstamp, + bbr->ack_epoch_mstamp); + expected_acked = ((u64)bbr_bw(sk) * epoch_us) / BW_UNIT; + + /* Reset the aggregation epoch if ACK rate is below expected rate or + * significantly large no. of ack received since epoch (potentially + * quite old epoch). + */ + if (bbr->ack_epoch_acked <= expected_acked || + (bbr->ack_epoch_acked + rs->acked_sacked >= + bbr_ack_epoch_acked_reset_thresh)) { + bbr->ack_epoch_acked = 0; + bbr->ack_epoch_mstamp = tp->delivered_mstamp; + expected_acked = 0; + } + + /* Compute excess data delivered, beyond what was expected. */ + bbr->ack_epoch_acked = min_t(u32, 0xFFFFF, + bbr->ack_epoch_acked + rs->acked_sacked); + extra_acked = bbr->ack_epoch_acked - expected_acked; + extra_acked = min(extra_acked, tp->snd_cwnd); + if (extra_acked > bbr->extra_acked[bbr->extra_acked_win_idx]) + bbr->extra_acked[bbr->extra_acked_win_idx] = extra_acked; +} + +/* Estimate when the pipe is full, using the change in delivery rate: BBR + * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by + * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited + * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the + * higher rwin, 3: we get higher delivery rate samples. Or transient + * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar + * design goal, but uses delay and inter-ACK spacing instead of bandwidth. + */ +static void bbr_check_full_bw_reached(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bw_thresh; + + if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited) + return; + + bw_thresh = (u64)bbr->full_bw * bbr->params.full_bw_thresh >> BBR_SCALE; + if (bbr_max_bw(sk) >= bw_thresh) { + bbr->full_bw = bbr_max_bw(sk); + bbr->full_bw_cnt = 0; + return; + } + ++bbr->full_bw_cnt; + bbr->full_bw_reached = bbr->full_bw_cnt >= bbr->params.full_bw_cnt; +} + +/* If pipe is probably full, drain the queue and then enter steady-state. */ +static bool bbr_check_drain(struct sock *sk, const struct rate_sample *rs, + struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { + bbr->mode = BBR_DRAIN; /* drain queue we created */ + tcp_sk(sk)->snd_ssthresh = + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); + bbr2_reset_congestion_signals(sk); + } /* fall through to check if in-flight is already small: */ + if (bbr->mode == BBR_DRAIN && + bbr_packets_in_net_at_edt(sk, tcp_packets_in_flight(tcp_sk(sk))) <= + bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT)) + return true; /* exiting DRAIN now */ + return false; +} + +static void bbr_check_probe_rtt_done(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (!(bbr->probe_rtt_done_stamp && + after(tcp_jiffies32, bbr->probe_rtt_done_stamp))) + return; + + bbr->probe_rtt_min_stamp = tcp_jiffies32; /* schedule next PROBE_RTT */ + tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd); + bbr2_exit_probe_rtt(sk); +} + +/* The goal of PROBE_RTT mode is to have BBR flows cooperatively and + * periodically drain the bottleneck queue, to converge to measure the true + * min_rtt (unloaded propagation delay). This allows the flows to keep queues + * small (reducing queuing delay and packet loss) and achieve fairness among + * BBR flows. + * + * The min_rtt filter window is 10 seconds. When the min_rtt estimate expires, + * we enter PROBE_RTT mode and cap the cwnd at bbr_cwnd_min_target=4 packets. + * After at least bbr_probe_rtt_mode_ms=200ms and at least one packet-timed + * round trip elapsed with that flight size <= 4, we leave PROBE_RTT mode and + * re-enter the previous mode. BBR uses 200ms to approximately bound the + * performance penalty of PROBE_RTT's cwnd capping to roughly 2% (200ms/10s). + * + * Note that flows need only pay 2% if they are busy sending over the last 10 + * seconds. Interactive applications (e.g., Web, RPCs, video chunks) often have + * natural silences or low-rate periods within 10 seconds where the rate is low + * enough for long enough to drain its queue in the bottleneck. We pick up + * these min RTT measurements opportunistically with our min_rtt filter. :-) + */ +static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + bool probe_rtt_expired, min_rtt_expired; + u32 expire; + + /* Track min RTT in probe_rtt_win_ms to time next PROBE_RTT state. */ + expire = bbr->probe_rtt_min_stamp + + msecs_to_jiffies(bbr->params.probe_rtt_win_ms); + probe_rtt_expired = after(tcp_jiffies32, expire); + if (rs->rtt_us >= 0 && + (rs->rtt_us <= bbr->probe_rtt_min_us || + (probe_rtt_expired && !rs->is_ack_delayed))) { + bbr->probe_rtt_min_us = rs->rtt_us; + bbr->probe_rtt_min_stamp = tcp_jiffies32; + } + /* Track min RTT seen in the min_rtt_win_sec filter window: */ + expire = bbr->min_rtt_stamp + bbr->params.min_rtt_win_sec * HZ; + min_rtt_expired = after(tcp_jiffies32, expire); + if (bbr->probe_rtt_min_us <= bbr->min_rtt_us || + min_rtt_expired) { + bbr->min_rtt_us = bbr->probe_rtt_min_us; + bbr->min_rtt_stamp = bbr->probe_rtt_min_stamp; + } + + if (bbr->params.probe_rtt_mode_ms > 0 && probe_rtt_expired && + !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { + bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ + bbr_save_cwnd(sk); /* note cwnd so we can restore it */ + bbr->probe_rtt_done_stamp = 0; + bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; + bbr->next_rtt_delivered = tp->delivered; + } + + if (bbr->mode == BBR_PROBE_RTT) { + /* Ignore low rate samples during this mode. */ + tp->app_limited = + (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; + /* Maintain min packets in flight for max(200 ms, 1 round). */ + if (!bbr->probe_rtt_done_stamp && + tcp_packets_in_flight(tp) <= bbr_probe_rtt_cwnd(sk)) { + bbr->probe_rtt_done_stamp = tcp_jiffies32 + + msecs_to_jiffies(bbr->params.probe_rtt_mode_ms); + bbr->probe_rtt_round_done = 0; + bbr->next_rtt_delivered = tp->delivered; + } else if (bbr->probe_rtt_done_stamp) { + if (bbr->round_start) + bbr->probe_rtt_round_done = 1; + if (bbr->probe_rtt_round_done) + bbr_check_probe_rtt_done(sk); + } + } + /* Restart after idle ends only once we process a new S/ACK for data */ + if (rs->delivered > 0) + bbr->idle_restart = 0; +} + +static void bbr_update_gains(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + switch (bbr->mode) { + case BBR_STARTUP: + bbr->pacing_gain = bbr->params.high_gain; + bbr->cwnd_gain = bbr->params.startup_cwnd_gain; + break; + case BBR_DRAIN: + bbr->pacing_gain = bbr->params.drain_gain; /* slow, to drain */ + bbr->cwnd_gain = bbr->params.startup_cwnd_gain; /* keep cwnd */ + break; + case BBR_PROBE_BW: + bbr->pacing_gain = bbr->params.pacing_gain[bbr->cycle_idx]; + bbr->cwnd_gain = bbr->params.cwnd_gain; + break; + case BBR_PROBE_RTT: + bbr->pacing_gain = BBR_UNIT; + bbr->cwnd_gain = BBR_UNIT; + break; + default: + WARN_ONCE(1, "BBR bad mode: %u\n", bbr->mode); + break; + } +} + +static void bbr_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + int i; + + WARN_ON_ONCE(tp->snd_cwnd >= bbr_cwnd_warn_val); + + bbr->initialized = 1; + bbr->params.high_gain = min(0x7FF, bbr_high_gain); + bbr->params.drain_gain = min(0x3FF, bbr_drain_gain); + bbr->params.startup_cwnd_gain = min(0x7FF, bbr_startup_cwnd_gain); + bbr->params.cwnd_gain = min(0x7FF, bbr_cwnd_gain); + bbr->params.cwnd_tso_budget = min(0x1U, bbr_cwnd_tso_budget); + bbr->params.cwnd_min_target = min(0xFU, bbr_cwnd_min_target); + bbr->params.min_rtt_win_sec = min(0x1FU, bbr_min_rtt_win_sec); + bbr->params.probe_rtt_mode_ms = min(0x1FFU, bbr_probe_rtt_mode_ms); + bbr->params.full_bw_cnt = min(0x7U, bbr_full_bw_cnt); + bbr->params.full_bw_thresh = min(0x3FFU, bbr_full_bw_thresh); + bbr->params.extra_acked_gain = min(0x7FF, bbr_extra_acked_gain); + bbr->params.extra_acked_win_rtts = min(0x1FU, bbr_extra_acked_win_rtts); + bbr->params.drain_to_target = bbr_drain_to_target ? 1 : 0; + bbr->params.precise_ece_ack = bbr_precise_ece_ack ? 1 : 0; + bbr->params.extra_acked_in_startup = bbr_extra_acked_in_startup ? 1 : 0; + bbr->params.probe_rtt_cwnd_gain = min(0xFFU, bbr_probe_rtt_cwnd_gain); + bbr->params.probe_rtt_win_ms = + min(0x3FFFU, + min_t(u32, bbr_probe_rtt_win_ms, + bbr->params.min_rtt_win_sec * MSEC_PER_SEC)); + for (i = 0; i < CYCLE_LEN; i++) + bbr->params.pacing_gain[i] = min(0x3FF, bbr_pacing_gain[i]); + bbr->params.usage_based_cwnd = bbr_usage_based_cwnd ? 1 : 0; + bbr->params.tso_rtt_shift = min(0xFU, bbr_tso_rtt_shift); + + bbr->debug.snd_isn = tp->snd_una; + bbr->debug.target_cwnd = 0; + bbr->debug.undo = 0; + + bbr->init_cwnd = min(0x7FU, tp->snd_cwnd); + bbr->prior_cwnd = tp->prior_cwnd; + tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; + bbr->next_rtt_delivered = 0; + bbr->prev_ca_state = TCP_CA_Open; + bbr->packet_conservation = 0; + + bbr->probe_rtt_done_stamp = 0; + bbr->probe_rtt_round_done = 0; + bbr->probe_rtt_min_us = tcp_min_rtt(tp); + bbr->probe_rtt_min_stamp = tcp_jiffies32; + bbr->min_rtt_us = tcp_min_rtt(tp); + bbr->min_rtt_stamp = tcp_jiffies32; + + bbr->has_seen_rtt = 0; + bbr_init_pacing_rate_from_rtt(sk); + + bbr->round_start = 0; + bbr->idle_restart = 0; + bbr->full_bw_reached = 0; + bbr->full_bw = 0; + bbr->full_bw_cnt = 0; + bbr->cycle_mstamp = 0; + bbr->cycle_idx = 0; + bbr->mode = BBR_STARTUP; + bbr->debug.rs_bw = 0; + + bbr->ack_epoch_mstamp = tp->tcp_mstamp; + bbr->ack_epoch_acked = 0; + bbr->extra_acked_win_rtts = 0; + bbr->extra_acked_win_idx = 0; + bbr->extra_acked[0] = 0; + bbr->extra_acked[1] = 0; + + bbr->ce_state = 0; + bbr->prior_rcv_nxt = tp->rcv_nxt; + bbr->try_fast_path = 0; + + cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED); +} + +static u32 bbr_sndbuf_expand(struct sock *sk) +{ + /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ + return 3; +} + +/* __________________________________________________________________________ + * + * Functions new to BBR v2 ("bbr") congestion control are below here. + * __________________________________________________________________________ + */ + +/* Incorporate a new bw sample into the current window of our max filter. */ +static void bbr2_take_bw_hi_sample(struct sock *sk, u32 bw) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->bw_hi[1] = max(bw, bbr->bw_hi[1]); +} + +/* Keep max of last 1-2 cycles. Each PROBE_BW cycle, flip filter window. */ +static void bbr2_advance_bw_hi_filter(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (!bbr->bw_hi[1]) + return; /* no samples in this window; remember old window */ + bbr->bw_hi[0] = bbr->bw_hi[1]; + bbr->bw_hi[1] = 0; +} + +/* How much do we want in flight? Our BDP, unless congestion cut cwnd. */ +static u32 bbr2_target_inflight(struct sock *sk) +{ + u32 bdp = bbr_inflight(sk, bbr_bw(sk), BBR_UNIT); + + return min(bdp, tcp_sk(sk)->snd_cwnd); +} + +static bool bbr2_is_probing_bandwidth(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return (bbr->mode == BBR_STARTUP) || + (bbr->mode == BBR_PROBE_BW && + (bbr->cycle_idx == BBR_BW_PROBE_REFILL || + bbr->cycle_idx == BBR_BW_PROBE_UP)); +} + +/* Has the given amount of time elapsed since we marked the phase start? */ +static bool bbr2_has_elapsed_in_phase(const struct sock *sk, u32 interval_us) +{ + const struct tcp_sock *tp = tcp_sk(sk); + const struct bbr *bbr = inet_csk_ca(sk); + + return tcp_stamp_us_delta(tp->tcp_mstamp, + bbr->cycle_mstamp + interval_us) > 0; +} + +static void bbr2_handle_queue_too_high_in_startup(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->full_bw_reached = 1; + bbr->inflight_hi = bbr_inflight(sk, bbr_max_bw(sk), BBR_UNIT); +} + +/* Exit STARTUP upon N consecutive rounds with ECN mark rate > ecn_thresh. */ +static void bbr2_check_ecn_too_high_in_startup(struct sock *sk, u32 ce_ratio) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr_full_bw_reached(sk) || !bbr->ecn_eligible || + !bbr->params.full_ecn_cnt || !bbr->params.ecn_thresh) + return; + + if (ce_ratio >= bbr->params.ecn_thresh) + bbr->startup_ecn_rounds++; + else + bbr->startup_ecn_rounds = 0; + + if (bbr->startup_ecn_rounds >= bbr->params.full_ecn_cnt) { + bbr->debug.event = 'E'; /* ECN caused STARTUP exit */ + bbr2_handle_queue_too_high_in_startup(sk); + return; + } +} + +static void bbr2_update_ecn_alpha(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + s32 delivered, delivered_ce; + u64 alpha, ce_ratio; + u32 gain; + + if (bbr->params.ecn_factor == 0) + return; + + delivered = tp->delivered - bbr->alpha_last_delivered; + delivered_ce = tp->delivered_ce - bbr->alpha_last_delivered_ce; + + if (delivered == 0 || /* avoid divide by zero */ + WARN_ON_ONCE(delivered < 0 || delivered_ce < 0)) /* backwards? */ + return; + + /* See if we should use ECN sender logic for this connection. */ + if (!bbr->ecn_eligible && bbr_ecn_enable && + (bbr->min_rtt_us <= bbr->params.ecn_max_rtt_us || + !bbr->params.ecn_max_rtt_us)) + bbr->ecn_eligible = 1; + + ce_ratio = (u64)delivered_ce << BBR_SCALE; + do_div(ce_ratio, delivered); + gain = bbr->params.ecn_alpha_gain; + alpha = ((BBR_UNIT - gain) * bbr->ecn_alpha) >> BBR_SCALE; + alpha += (gain * ce_ratio) >> BBR_SCALE; + bbr->ecn_alpha = min_t(u32, alpha, BBR_UNIT); + + bbr->alpha_last_delivered = tp->delivered; + bbr->alpha_last_delivered_ce = tp->delivered_ce; + + bbr2_check_ecn_too_high_in_startup(sk, ce_ratio); +} + +/* Each round trip of BBR_BW_PROBE_UP, double volume of probing data. */ +static void bbr2_raise_inflight_hi_slope(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 growth_this_round, cnt; + + /* Calculate "slope": packets S/Acked per inflight_hi increment. */ + growth_this_round = 1 << bbr->bw_probe_up_rounds; + bbr->bw_probe_up_rounds = min(bbr->bw_probe_up_rounds + 1, 30); + cnt = tp->snd_cwnd / growth_this_round; + cnt = max(cnt, 1U); + bbr->bw_probe_up_cnt = cnt; + bbr->debug.event = 'G'; /* Grow inflight_hi slope */ +} + +/* In BBR_BW_PROBE_UP, not seeing high loss/ECN/queue, so raise inflight_hi. */ +static void bbr2_probe_inflight_hi_upward(struct sock *sk, + const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 delta; + + if (!tp->is_cwnd_limited || tp->snd_cwnd < bbr->inflight_hi) { + bbr->bw_probe_up_acks = 0; /* don't accmulate unused credits */ + return; /* not fully using inflight_hi, so don't grow it */ + } + + /* For each bw_probe_up_cnt packets ACKed, increase inflight_hi by 1. */ + bbr->bw_probe_up_acks += rs->acked_sacked; + if (bbr->bw_probe_up_acks >= bbr->bw_probe_up_cnt) { + delta = bbr->bw_probe_up_acks / bbr->bw_probe_up_cnt; + bbr->bw_probe_up_acks -= delta * bbr->bw_probe_up_cnt; + bbr->inflight_hi += delta; + bbr->debug.event = 'I'; /* Increment inflight_hi */ + } + + if (bbr->round_start) + bbr2_raise_inflight_hi_slope(sk); +} + +/* Does loss/ECN rate for this sample say inflight is "too high"? + * This is used by both the bbr_check_loss_too_high_in_startup() function, + * which can be used in either v1 or v2, and the PROBE_UP phase of v2, which + * uses it to notice when loss/ECN rates suggest inflight is too high. + */ +static bool bbr2_is_inflight_too_high(const struct sock *sk, + const struct rate_sample *rs) +{ + const struct bbr *bbr = inet_csk_ca(sk); + u32 loss_thresh, ecn_thresh; + + if (rs->lost > 0 && rs->tx_in_flight) { + loss_thresh = (u64)rs->tx_in_flight * bbr->params.loss_thresh >> + BBR_SCALE; + if (rs->lost > loss_thresh) + return true; + } + + if (rs->delivered_ce > 0 && rs->delivered > 0 && + bbr->ecn_eligible && bbr->params.ecn_thresh) { + ecn_thresh = (u64)rs->delivered * bbr->params.ecn_thresh >> + BBR_SCALE; + if (rs->delivered_ce >= ecn_thresh) + return true; + } + + return false; +} + +/* Calculate the tx_in_flight level that corresponded to excessive loss. + * We find "lost_prefix" segs of the skb where loss rate went too high, + * by solving for "lost_prefix" in the following equation: + * lost / inflight >= loss_thresh + * (lost_prev + lost_prefix) / (inflight_prev + lost_prefix) >= loss_thresh + * Then we take that equation, convert it to fixed point, and + * round up to the nearest packet. + */ +static u32 bbr2_inflight_hi_from_lost_skb(const struct sock *sk, + const struct rate_sample *rs, + const struct sk_buff *skb) +{ + const struct bbr *bbr = inet_csk_ca(sk); + u32 loss_thresh = bbr->params.loss_thresh; + u32 pcount, divisor, inflight_hi; + s32 inflight_prev, lost_prev; + u64 loss_budget, lost_prefix; + + pcount = tcp_skb_pcount(skb); + + /* How much data was in flight before this skb? */ + inflight_prev = rs->tx_in_flight - pcount; + if (WARN_ONCE(inflight_prev < 0, + "tx_in_flight: %u pcount: %u reneg: %u", + rs->tx_in_flight, pcount, tcp_sk(sk)->is_sack_reneg)) + return ~0U; + + /* How much inflight data was marked lost before this skb? */ + lost_prev = rs->lost - pcount; + if (WARN_ON_ONCE(lost_prev < 0)) + return ~0U; + + /* At what prefix of this lost skb did losss rate exceed loss_thresh? */ + loss_budget = (u64)inflight_prev * loss_thresh + BBR_UNIT - 1; + loss_budget >>= BBR_SCALE; + if (lost_prev >= loss_budget) { + lost_prefix = 0; /* previous losses crossed loss_thresh */ + } else { + lost_prefix = loss_budget - lost_prev; + lost_prefix <<= BBR_SCALE; + divisor = BBR_UNIT - loss_thresh; + if (WARN_ON_ONCE(!divisor)) /* loss_thresh is 8 bits */ + return ~0U; + do_div(lost_prefix, divisor); + } + + inflight_hi = inflight_prev + lost_prefix; + return inflight_hi; +} + +/* If loss/ECN rates during probing indicated we may have overfilled a + * buffer, return an operating point that tries to leave unutilized headroom in + * the path for other flows, for fairness convergence and lower RTTs and loss. + */ +static u32 bbr2_inflight_with_headroom(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 headroom, headroom_fraction; + + if (bbr->inflight_hi == ~0U) + return ~0U; + + headroom_fraction = bbr->params.inflight_headroom; + headroom = ((u64)bbr->inflight_hi * headroom_fraction) >> BBR_SCALE; + headroom = max(headroom, 1U); + return max_t(s32, bbr->inflight_hi - headroom, + bbr->params.cwnd_min_target); +} + +/* Bound cwnd to a sensible level, based on our current probing state + * machine phase and model of a good inflight level (inflight_lo, inflight_hi). + */ +static void bbr2_bound_cwnd_for_inflight_model(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 cap; + + /* tcp_rcv_synsent_state_process() currently calls tcp_ack() + * and thus cong_control() without first initializing us(!). + */ + if (!bbr->initialized) + return; + + cap = ~0U; + if (bbr->mode == BBR_PROBE_BW && + bbr->cycle_idx != BBR_BW_PROBE_CRUISE) { + /* Probe to see if more packets fit in the path. */ + cap = bbr->inflight_hi; + } else { + if (bbr->mode == BBR_PROBE_RTT || + (bbr->mode == BBR_PROBE_BW && + bbr->cycle_idx == BBR_BW_PROBE_CRUISE)) + cap = bbr2_inflight_with_headroom(sk); + } + /* Adapt to any loss/ECN since our last bw probe. */ + cap = min(cap, bbr->inflight_lo); + + cap = max_t(u32, cap, bbr->params.cwnd_min_target); + tp->snd_cwnd = min(cap, tp->snd_cwnd); +} + +/* Estimate a short-term lower bound on the capacity available now, based + * on measurements of the current delivery process and recent history. When we + * are seeing loss/ECN at times when we are not probing bw, then conservatively + * move toward flow balance by multiplicatively cutting our short-term + * estimated safe rate and volume of data (bw_lo and inflight_lo). We use a + * multiplicative decrease in order to converge to a lower capacity in time + * logarithmic in the magnitude of the decrease. + * + * However, we do not cut our short-term estimates lower than the current rate + * and volume of delivered data from this round trip, since from the current + * delivery process we can estimate the measured capacity available now. + * + * Anything faster than that approach would knowingly risk high loss, which can + * cause low bw for Reno/CUBIC and high loss recovery latency for + * request/response flows using any congestion control. + */ +static void bbr2_adapt_lower_bounds(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 ecn_cut, ecn_inflight_lo, beta; + + /* We only use lower-bound estimates when not probing bw. + * When probing we need to push inflight higher to probe bw. + */ + if (bbr2_is_probing_bandwidth(sk)) + return; + + /* ECN response. */ + if (bbr->ecn_in_round && bbr->ecn_eligible && bbr->params.ecn_factor) { + /* Reduce inflight to (1 - alpha*ecn_factor). */ + ecn_cut = (BBR_UNIT - + ((bbr->ecn_alpha * bbr->params.ecn_factor) >> + BBR_SCALE)); + if (bbr->inflight_lo == ~0U) + bbr->inflight_lo = tp->snd_cwnd; + ecn_inflight_lo = (u64)bbr->inflight_lo * ecn_cut >> BBR_SCALE; + } else { + ecn_inflight_lo = ~0U; + } + + /* Loss response. */ + if (bbr->loss_in_round) { + /* Reduce bw and inflight to (1 - beta). */ + if (bbr->bw_lo == ~0U) + bbr->bw_lo = bbr_max_bw(sk); + if (bbr->inflight_lo == ~0U) + bbr->inflight_lo = tp->snd_cwnd; + beta = bbr->params.beta; + bbr->bw_lo = + max_t(u32, bbr->bw_latest, + (u64)bbr->bw_lo * + (BBR_UNIT - beta) >> BBR_SCALE); + bbr->inflight_lo = + max_t(u32, bbr->inflight_latest, + (u64)bbr->inflight_lo * + (BBR_UNIT - beta) >> BBR_SCALE); + } + + /* Adjust to the lower of the levels implied by loss or ECN. */ + bbr->inflight_lo = min(bbr->inflight_lo, ecn_inflight_lo); +} + +/* Reset any short-term lower-bound adaptation to congestion, so that we can + * push our inflight up. + */ +static void bbr2_reset_lower_bounds(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->bw_lo = ~0U; + bbr->inflight_lo = ~0U; +} + +/* After bw probing (STARTUP/PROBE_UP), reset signals before entering a state + * machine phase where we adapt our lower bound based on congestion signals. + */ +static void bbr2_reset_congestion_signals(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->loss_in_round = 0; + bbr->ecn_in_round = 0; + bbr->loss_in_cycle = 0; + bbr->ecn_in_cycle = 0; + bbr->bw_latest = 0; + bbr->inflight_latest = 0; +} + +/* Update (most of) our congestion signals: track the recent rate and volume of + * delivered data, presence of loss, and EWMA degree of ECN marking. + */ +static void bbr2_update_congestion_signals( + struct sock *sk, const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + + bbr->loss_round_start = 0; + if (rs->interval_us <= 0 || !rs->acked_sacked) + return; /* Not a valid observation */ + bw = ctx->sample_bw; + + if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) + bbr2_take_bw_hi_sample(sk, bw); + + bbr->loss_in_round |= (rs->losses > 0); + + /* Update rate and volume of delivered data from latest round trip: */ + bbr->bw_latest = max_t(u32, bbr->bw_latest, ctx->sample_bw); + bbr->inflight_latest = max_t(u32, bbr->inflight_latest, rs->delivered); + + if (before(rs->prior_delivered, bbr->loss_round_delivered)) + return; /* skip the per-round-trip updates */ + /* Now do per-round-trip updates. */ + bbr->loss_round_delivered = tp->delivered; /* mark round trip */ + bbr->loss_round_start = 1; + bbr2_adapt_lower_bounds(sk); + + /* Update windowed "latest" (single-round-trip) filters. */ + bbr->loss_in_round = 0; + bbr->ecn_in_round = 0; + bbr->bw_latest = ctx->sample_bw; + bbr->inflight_latest = rs->delivered; +} + +/* Bandwidth probing can cause loss. To help coexistence with loss-based + * congestion control we spread out our probing in a Reno-conscious way. Due to + * the shape of the Reno sawtooth, the time required between loss epochs for an + * idealized Reno flow is a number of round trips that is the BDP of that + * flow. We count packet-timed round trips directly, since measured RTT can + * vary widely, and Reno is driven by packet-timed round trips. + */ +static bool bbr2_is_reno_coexistence_probe_time(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 inflight, rounds, reno_gain, reno_rounds; + + /* Random loss can shave some small percentage off of our inflight + * in each round. To survive this, flows need robust periodic probes. + */ + rounds = bbr->params.bw_probe_max_rounds; + + reno_gain = bbr->params.bw_probe_reno_gain; + if (reno_gain) { + inflight = bbr2_target_inflight(sk); + reno_rounds = ((u64)inflight * reno_gain) >> BBR_SCALE; + rounds = min(rounds, reno_rounds); + } + return bbr->rounds_since_probe >= rounds; +} + +/* How long do we want to wait before probing for bandwidth (and risking + * loss)? We randomize the wait, for better mixing and fairness convergence. + * + * We bound the Reno-coexistence inter-bw-probe time to be 62-63 round trips. + * This is calculated to allow fairness with a 25Mbps, 30ms Reno flow, + * (eg 4K video to a broadband user): + * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets + * + * We bound the BBR-native inter-bw-probe wall clock time to be: + * (a) higher than 2 sec: to try to avoid causing loss for a long enough time + * to allow Reno at 30ms to get 4K video bw, the inter-bw-probe time must + * be at least: 25Mbps * .030sec / (1514bytes) * 0.030sec = 1.9secs + * (b) lower than 3 sec: to ensure flows can start probing in a reasonable + * amount of time to discover unutilized bw on human-scale interactive + * time-scales (e.g. perhaps traffic from a web page download that we + * were competing with is now complete). + */ +static void bbr2_pick_probe_wait(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + /* Decide the random round-trip bound for wait until probe: */ + bbr->rounds_since_probe = + prandom_u32_max(bbr->params.bw_probe_rand_rounds); + /* Decide the random wall clock bound for wait until probe: */ + bbr->probe_wait_us = bbr->params.bw_probe_base_us + + prandom_u32_max(bbr->params.bw_probe_rand_us); +} + +static void bbr2_set_cycle_idx(struct sock *sk, int cycle_idx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->cycle_idx = cycle_idx; + /* New phase, so need to update cwnd and pacing rate. */ + bbr->try_fast_path = 0; +} + +/* Send at estimated bw to fill the pipe, but not queue. We need this phase + * before PROBE_UP, because as soon as we send faster than the available bw + * we will start building a queue, and if the buffer is shallow we can cause + * loss. If we do not fill the pipe before we cause this loss, our bw_hi and + * inflight_hi estimates will underestimate. + */ +static void bbr2_start_bw_probe_refill(struct sock *sk, u32 bw_probe_up_rounds) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr2_reset_lower_bounds(sk); + if (bbr->inflight_hi != ~0U) + bbr->inflight_hi += bbr->params.refill_add_inc; + bbr->bw_probe_up_rounds = bw_probe_up_rounds; + bbr->bw_probe_up_acks = 0; + bbr->stopped_risky_probe = 0; + bbr->ack_phase = BBR_ACKS_REFILLING; + bbr->next_rtt_delivered = tp->delivered; + bbr2_set_cycle_idx(sk, BBR_BW_PROBE_REFILL); +} + +/* Now probe max deliverable data rate and volume. */ +static void bbr2_start_bw_probe_up(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->ack_phase = BBR_ACKS_PROBE_STARTING; + bbr->next_rtt_delivered = tp->delivered; + bbr->cycle_mstamp = tp->tcp_mstamp; + bbr2_set_cycle_idx(sk, BBR_BW_PROBE_UP); + bbr2_raise_inflight_hi_slope(sk); +} + +/* Start a new PROBE_BW probing cycle of some wall clock length. Pick a wall + * clock time at which to probe beyond an inflight that we think to be + * safe. This will knowingly risk packet loss, so we want to do this rarely, to + * keep packet loss rates low. Also start a round-trip counter, to probe faster + * if we estimate a Reno flow at our BDP would probe faster. + */ +static void bbr2_start_bw_probe_down(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr2_reset_congestion_signals(sk); + bbr->bw_probe_up_cnt = ~0U; /* not growing inflight_hi any more */ + bbr2_pick_probe_wait(sk); + bbr->cycle_mstamp = tp->tcp_mstamp; /* start wall clock */ + bbr->ack_phase = BBR_ACKS_PROBE_STOPPING; + bbr->next_rtt_delivered = tp->delivered; + bbr2_set_cycle_idx(sk, BBR_BW_PROBE_DOWN); +} + +/* Cruise: maintain what we estimate to be a neutral, conservative + * operating point, without attempting to probe up for bandwidth or down for + * RTT, and only reducing inflight in response to loss/ECN signals. + */ +static void bbr2_start_bw_probe_cruise(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->inflight_lo != ~0U) + bbr->inflight_lo = min(bbr->inflight_lo, bbr->inflight_hi); + + bbr2_set_cycle_idx(sk, BBR_BW_PROBE_CRUISE); +} + +/* Loss and/or ECN rate is too high while probing. + * Adapt (once per bw probe) by cutting inflight_hi and then restarting cycle. + */ +static void bbr2_handle_inflight_too_high(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + const u32 beta = bbr->params.beta; + + bbr->prev_probe_too_high = 1; + bbr->bw_probe_samples = 0; /* only react once per probe */ + bbr->debug.event = 'L'; /* Loss/ECN too high */ + /* If we are app-limited then we are not robustly + * probing the max volume of inflight data we think + * might be safe (analogous to how app-limited bw + * samples are not known to be robustly probing bw). + */ + if (!rs->is_app_limited) + bbr->inflight_hi = max_t(u32, rs->tx_in_flight, + (u64)bbr2_target_inflight(sk) * + (BBR_UNIT - beta) >> BBR_SCALE); + if (bbr->mode == BBR_PROBE_BW && bbr->cycle_idx == BBR_BW_PROBE_UP) + bbr2_start_bw_probe_down(sk); +} + +/* If we're seeing bw and loss samples reflecting our bw probing, adapt + * using the signals we see. If loss or ECN mark rate gets too high, then adapt + * inflight_hi downward. If we're able to push inflight higher without such + * signals, push higher: adapt inflight_hi upward. + */ +static bool bbr2_adapt_upper_bounds(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + + /* Track when we'll see bw/loss samples resulting from our bw probes. */ + if (bbr->ack_phase == BBR_ACKS_PROBE_STARTING && bbr->round_start) + bbr->ack_phase = BBR_ACKS_PROBE_FEEDBACK; + if (bbr->ack_phase == BBR_ACKS_PROBE_STOPPING && bbr->round_start) { + /* End of samples from bw probing phase. */ + bbr->bw_probe_samples = 0; + bbr->ack_phase = BBR_ACKS_INIT; + /* At this point in the cycle, our current bw sample is also + * our best recent chance at finding the highest available bw + * for this flow. So now is the best time to forget the bw + * samples from the previous cycle, by advancing the window. + */ + if (bbr->mode == BBR_PROBE_BW && !rs->is_app_limited) + bbr2_advance_bw_hi_filter(sk); + /* If we had an inflight_hi, then probed and pushed inflight all + * the way up to hit that inflight_hi without seeing any + * high loss/ECN in all the resulting ACKs from that probing, + * then probe up again, this time letting inflight persist at + * inflight_hi for a round trip, then accelerating beyond. + */ + if (bbr->mode == BBR_PROBE_BW && + bbr->stopped_risky_probe && !bbr->prev_probe_too_high) { + bbr->debug.event = 'R'; /* reprobe */ + bbr2_start_bw_probe_refill(sk, 0); + return true; /* yes, decided state transition */ + } + } + + if (bbr2_is_inflight_too_high(sk, rs)) { + if (bbr->bw_probe_samples) /* sample is from bw probing? */ + bbr2_handle_inflight_too_high(sk, rs); + } else { + /* Loss/ECN rate is declared safe. Adjust upper bound upward. */ + if (bbr->inflight_hi == ~0U) /* no excess queue signals yet? */ + return false; + + /* To be resilient to random loss, we must raise inflight_hi + * if we observe in any phase that a higher level is safe. + */ + if (rs->tx_in_flight > bbr->inflight_hi) { + bbr->inflight_hi = rs->tx_in_flight; + bbr->debug.event = 'U'; /* raise up inflight_hi */ + } + + if (bbr->mode == BBR_PROBE_BW && + bbr->cycle_idx == BBR_BW_PROBE_UP) + bbr2_probe_inflight_hi_upward(sk, rs); + } + + return false; +} + +/* Check if it's time to probe for bandwidth now, and if so, kick it off. */ +static bool bbr2_check_time_to_probe_bw(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 n; + + /* If we seem to be at an operating point where we are not seeing loss + * but we are seeing ECN marks, then when the ECN marks cease we reprobe + * quickly (in case a burst of cross-traffic has ceased and freed up bw, + * or in case we are sharing with multiplicatively probing traffic). + */ + if (bbr->params.ecn_reprobe_gain && bbr->ecn_eligible && + bbr->ecn_in_cycle && !bbr->loss_in_cycle && + inet_csk(sk)->icsk_ca_state == TCP_CA_Open) { + bbr->debug.event = 'A'; /* *A*ll clear to probe *A*gain */ + /* Calculate n so that when bbr2_raise_inflight_hi_slope() + * computes growth_this_round as 2^n it will be roughly the + * desired volume of data (inflight_hi*ecn_reprobe_gain). + */ + n = ilog2((((u64)bbr->inflight_hi * + bbr->params.ecn_reprobe_gain) >> BBR_SCALE)); + bbr2_start_bw_probe_refill(sk, n); + return true; + } + + if (bbr2_has_elapsed_in_phase(sk, bbr->probe_wait_us) || + bbr2_is_reno_coexistence_probe_time(sk)) { + bbr2_start_bw_probe_refill(sk, 0); + return true; + } + return false; +} + +/* Is it time to transition from PROBE_DOWN to PROBE_CRUISE? */ +static bool bbr2_check_time_to_cruise(struct sock *sk, u32 inflight, u32 bw) +{ + struct bbr *bbr = inet_csk_ca(sk); + bool is_under_bdp, is_long_enough; + + /* Always need to pull inflight down to leave headroom in queue. */ + if (inflight > bbr2_inflight_with_headroom(sk)) + return false; + + is_under_bdp = inflight <= bbr_inflight(sk, bw, BBR_UNIT); + if (bbr->params.drain_to_target) + return is_under_bdp; + + is_long_enough = bbr2_has_elapsed_in_phase(sk, bbr->min_rtt_us); + return is_under_bdp || is_long_enough; +} + +/* PROBE_BW state machine: cruise, refill, probe for bw, or drain? */ +static void bbr2_update_cycle_phase(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + bool is_risky = false, is_queuing = false; + u32 inflight, bw; + + if (!bbr_full_bw_reached(sk)) + return; + + /* In DRAIN, PROBE_BW, or PROBE_RTT, adjust upper bounds. */ + if (bbr2_adapt_upper_bounds(sk, rs)) + return; /* already decided state transition */ + + if (bbr->mode != BBR_PROBE_BW) + return; + + inflight = bbr_packets_in_net_at_edt(sk, rs->prior_in_flight); + bw = bbr_max_bw(sk); + + switch (bbr->cycle_idx) { + /* First we spend most of our time cruising with a pacing_gain of 1.0, + * which paces at the estimated bw, to try to fully use the pipe + * without building queue. If we encounter loss/ECN marks, we adapt + * by slowing down. + */ + case BBR_BW_PROBE_CRUISE: + if (bbr2_check_time_to_probe_bw(sk)) + return; /* already decided state transition */ + break; + + /* After cruising, when it's time to probe, we first "refill": we send + * at the estimated bw to fill the pipe, before probing higher and + * knowingly risking overflowing the bottleneck buffer (causing loss). + */ + case BBR_BW_PROBE_REFILL: + if (bbr->round_start) { + /* After one full round trip of sending in REFILL, we + * start to see bw samples reflecting our REFILL, which + * may be putting too much data in flight. + */ + bbr->bw_probe_samples = 1; + bbr2_start_bw_probe_up(sk); + } + break; + + /* After we refill the pipe, we probe by using a pacing_gain > 1.0, to + * probe for bw. If we have not seen loss/ECN, we try to raise inflight + * to at least pacing_gain*BDP; note that this may take more than + * min_rtt if min_rtt is small (e.g. on a LAN). + * + * We terminate PROBE_UP bandwidth probing upon any of the following: + * + * (1) We've pushed inflight up to hit the inflight_hi target set in the + * most recent previous bw probe phase. Thus we want to start + * draining the queue immediately because it's very likely the most + * recently sent packets will fill the queue and cause drops. + * (checked here) + * (2) We have probed for at least 1*min_rtt_us, and the + * estimated queue is high enough (inflight > 1.25 * estimated_bdp). + * (checked here) + * (3) Loss filter says loss rate is "too high". + * (checked in bbr_is_inflight_too_high()) + * (4) ECN filter says ECN mark rate is "too high". + * (checked in bbr_is_inflight_too_high()) + */ + case BBR_BW_PROBE_UP: + if (bbr->prev_probe_too_high && + inflight >= bbr->inflight_hi) { + bbr->stopped_risky_probe = 1; + is_risky = true; + bbr->debug.event = 'D'; /* D for danger */ + } else if (bbr2_has_elapsed_in_phase(sk, bbr->min_rtt_us) && + inflight >= + bbr_inflight(sk, bw, + bbr->params.bw_probe_pif_gain)) { + is_queuing = true; + bbr->debug.event = 'Q'; /* building Queue */ + } + if (is_risky || is_queuing) { + bbr->prev_probe_too_high = 0; /* no loss/ECN (yet) */ + bbr2_start_bw_probe_down(sk); /* restart w/ down */ + } + break; + + /* After probing in PROBE_UP, we have usually accumulated some data in + * the bottleneck buffer (if bw probing didn't find more bw). We next + * enter PROBE_DOWN to try to drain any excess data from the queue. To + * do this, we use a pacing_gain < 1.0. We hold this pacing gain until + * our inflight is less then that target cruising point, which is the + * minimum of (a) the amount needed to leave headroom, and (b) the + * estimated BDP. Once inflight falls to match the target, we estimate + * the queue is drained; persisting would underutilize the pipe. + */ + case BBR_BW_PROBE_DOWN: + if (bbr2_check_time_to_probe_bw(sk)) + return; /* already decided state transition */ + if (bbr2_check_time_to_cruise(sk, inflight, bw)) + bbr2_start_bw_probe_cruise(sk); + break; + + default: + WARN_ONCE(1, "BBR invalid cycle index %u\n", bbr->cycle_idx); + } +} + +/* Exiting PROBE_RTT, so return to bandwidth probing in STARTUP or PROBE_BW. */ +static void bbr2_exit_probe_rtt(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr2_reset_lower_bounds(sk); + if (bbr_full_bw_reached(sk)) { + bbr->mode = BBR_PROBE_BW; + /* Raising inflight after PROBE_RTT may cause loss, so reset + * the PROBE_BW clock and schedule the next bandwidth probe for + * a friendly and randomized future point in time. + */ + bbr2_start_bw_probe_down(sk); + /* Since we are exiting PROBE_RTT, we know inflight is + * below our estimated BDP, so it is reasonable to cruise. + */ + bbr2_start_bw_probe_cruise(sk); + } else { + bbr->mode = BBR_STARTUP; + } +} + +/* Exit STARTUP based on loss rate > 1% and loss gaps in round >= N. Wait until + * the end of the round in recovery to get a good estimate of how many packets + * have been lost, and how many we need to drain with a low pacing rate. + */ +static void bbr2_check_loss_too_high_in_startup(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr_full_bw_reached(sk)) + return; + + /* For STARTUP exit, check the loss rate at the end of each round trip + * of Recovery episodes in STARTUP. We check the loss rate at the end + * of the round trip to filter out noisy/low loss and have a better + * sense of inflight (extent of loss), so we can drain more accurately. + */ + if (rs->losses && bbr->loss_events_in_round < 0xf) + bbr->loss_events_in_round++; /* update saturating counter */ + if (bbr->params.full_loss_cnt && bbr->loss_round_start && + inet_csk(sk)->icsk_ca_state == TCP_CA_Recovery && + bbr->loss_events_in_round >= bbr->params.full_loss_cnt && + bbr2_is_inflight_too_high(sk, rs)) { + bbr->debug.event = 'P'; /* Packet loss caused STARTUP exit */ + bbr2_handle_queue_too_high_in_startup(sk); + return; + } + if (bbr->loss_round_start) + bbr->loss_events_in_round = 0; +} + +/* If we are done draining, advance into steady state operation in PROBE_BW. */ +static void bbr2_check_drain(struct sock *sk, const struct rate_sample *rs, + struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr_check_drain(sk, rs, ctx)) { + bbr->mode = BBR_PROBE_BW; + bbr2_start_bw_probe_down(sk); + } +} + +static void bbr2_update_model(struct sock *sk, const struct rate_sample *rs, + struct bbr_context *ctx) +{ + bbr2_update_congestion_signals(sk, rs, ctx); + bbr_update_ack_aggregation(sk, rs); + bbr2_check_loss_too_high_in_startup(sk, rs); + bbr_check_full_bw_reached(sk, rs); + bbr2_check_drain(sk, rs, ctx); + bbr2_update_cycle_phase(sk, rs); + bbr_update_min_rtt(sk, rs); +} + +/* Fast path for app-limited case. + * + * On each ack, we execute bbr state machine, which primarily consists of: + * 1) update model based on new rate sample, and + * 2) update control based on updated model or state change. + * + * There are certain workload/scenarios, e.g. app-limited case, where + * either we can skip updating model or we can skip update of both model + * as well as control. This provides signifcant softirq cpu savings for + * processing incoming acks. + * + * In case of app-limited, if there is no congestion (loss/ecn) and + * if observed bw sample is less than current estimated bw, then we can + * skip some of the computation in bbr state processing: + * + * - if there is no rtt/mode/phase change: In this case, since all the + * parameters of the network model are constant, we can skip model + * as well control update. + * + * - else we can skip rest of the model update. But we still need to + * update the control to account for the new rtt/mode/phase. + * + * Returns whether we can take fast path or not. + */ +static bool bbr2_fast_path(struct sock *sk, bool *update_model, + const struct rate_sample *rs, struct bbr_context *ctx) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 prev_min_rtt_us, prev_mode; + + if (bbr->params.fast_path && bbr->try_fast_path && + rs->is_app_limited && ctx->sample_bw < bbr_max_bw(sk) && + !bbr->loss_in_round && !bbr->ecn_in_round) { + prev_mode = bbr->mode; + prev_min_rtt_us = bbr->min_rtt_us; + bbr2_check_drain(sk, rs, ctx); + bbr2_update_cycle_phase(sk, rs); + bbr_update_min_rtt(sk, rs); + + if (bbr->mode == prev_mode && + bbr->min_rtt_us == prev_min_rtt_us && + bbr->try_fast_path) + return true; + + /* Skip model update, but control still needs to be updated */ + *update_model = false; + } + return false; +} + +static void bbr2_main(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + struct bbr_context ctx = { 0 }; + bool update_model = true; + u32 bw; + + bbr->debug.event = '.'; /* init to default NOP (no event yet) */ + + bbr_update_round_start(sk, rs, &ctx); + if (bbr->round_start) { + bbr->rounds_since_probe = + min_t(s32, bbr->rounds_since_probe + 1, 0xFF); + bbr2_update_ecn_alpha(sk); + } + + bbr->ecn_in_round |= rs->is_ece; + bbr_calculate_bw_sample(sk, rs, &ctx); + + if (bbr2_fast_path(sk, &update_model, rs, &ctx)) + goto out; + + if (update_model) + bbr2_update_model(sk, rs, &ctx); + + bbr_update_gains(sk); + bw = bbr_bw(sk); + bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); + bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain, + tp->snd_cwnd, &ctx); + bbr2_bound_cwnd_for_inflight_model(sk); + +out: + bbr->prev_ca_state = inet_csk(sk)->icsk_ca_state; + bbr->loss_in_cycle |= rs->lost > 0; + bbr->ecn_in_cycle |= rs->delivered_ce > 0; + + bbr_debug(sk, rs->acked_sacked, rs, &ctx); +} + +/* Module parameters that are settable by TCP_CONGESTION_PARAMS are declared + * down here, so that the algorithm functions that use the parameters must use + * the per-socket parameters; if they accidentally use the global version + * then there will be a compile error. + * TODO(ncardwell): move all per-socket parameters down to this section. + */ + +/* On losses, scale down inflight and pacing rate by beta scaled by BBR_SCALE. + * No loss response when 0. Max allwed value is 255. + */ +static u32 bbr_beta = BBR_UNIT * 30 / 100; + +/* Gain factor for ECN mark ratio samples, scaled by BBR_SCALE. + * Max allowed value is 255. + */ +static u32 bbr_ecn_alpha_gain = BBR_UNIT * 1 / 16; /* 1/16 = 6.25% */ + +/* The initial value for the ecn_alpha state variable. Default and max + * BBR_UNIT (256), representing 1.0. This allows a flow to respond quickly + * to congestion if the bottleneck is congested when the flow starts up. + */ +static u32 bbr_ecn_alpha_init = BBR_UNIT; /* 1.0, to respond quickly */ + +/* On ECN, cut inflight_lo to (1 - ecn_factor * ecn_alpha) scaled by BBR_SCALE. + * No ECN based bounding when 0. Max allwed value is 255. + */ +static u32 bbr_ecn_factor = BBR_UNIT * 1 / 3; /* 1/3 = 33% */ + +/* Estimate bw probing has gone too far if CE ratio exceeds this threshold. + * Scaled by BBR_SCALE. Disabled when 0. Max allowed is 255. + */ +static u32 bbr_ecn_thresh = BBR_UNIT * 1 / 2; /* 1/2 = 50% */ + +/* Max RTT (in usec) at which to use sender-side ECN logic. + * Disabled when 0 (ECN allowed at any RTT). + * Max allowed for the parameter is 524287 (0x7ffff) us, ~524 ms. + */ +static u32 bbr_ecn_max_rtt_us = 5000; + +/* If non-zero, if in a cycle with no losses but some ECN marks, after ECN + * clears then use a multiplicative increase to quickly reprobe bw by + * starting inflight probing at the given multiple of inflight_hi. + * Default for this experimental knob is 0 (disabled). + * Planned value for experiments: BBR_UNIT * 1 / 2 = 128, representing 0.5. + */ +static u32 bbr_ecn_reprobe_gain; + +/* Estimate bw probing has gone too far if loss rate exceeds this level. */ +static u32 bbr_loss_thresh = BBR_UNIT * 2 / 100; /* 2% loss */ + +/* Exit STARTUP if number of loss marking events in a Recovery round is >= N, + * and loss rate is higher than bbr_loss_thresh. + * Disabled if 0. Max allowed value is 15 (0xF). + */ +static u32 bbr_full_loss_cnt = 8; + +/* Exit STARTUP if number of round trips with ECN mark rate above ecn_thresh + * meets this count. Max allowed value is 3. + */ +static u32 bbr_full_ecn_cnt = 2; + +/* Fraction of unutilized headroom to try to leave in path upon high loss. */ +static u32 bbr_inflight_headroom = BBR_UNIT * 15 / 100; + +/* Multiplier to get target inflight (as multiple of BDP) for PROBE_UP phase. + * Default is 1.25x, as in BBR v1. Max allowed is 511. + */ +static u32 bbr_bw_probe_pif_gain = BBR_UNIT * 5 / 4; + +/* Multiplier to get Reno-style probe epoch duration as: k * BDP round trips. + * If zero, disables this BBR v2 Reno-style BDP-scaled coexistence mechanism. + * Max allowed is 511. + */ +static u32 bbr_bw_probe_reno_gain = BBR_UNIT; + +/* Max number of packet-timed rounds to wait before probing for bandwidth. If + * we want to tolerate 1% random loss per round, and not have this cut our + * inflight too much, we must probe for bw periodically on roughly this scale. + * If low, limits Reno/CUBIC coexistence; if high, limits loss tolerance. + * We aim to be fair with Reno/CUBIC up to a BDP of at least: + * BDP = 25Mbps * .030sec /(1514bytes) = 61.9 packets + */ +static u32 bbr_bw_probe_max_rounds = 63; + +/* Max amount of randomness to inject in round counting for Reno-coexistence. + * Max value is 15. + */ +static u32 bbr_bw_probe_rand_rounds = 2; + +/* Use BBR-native probe time scale starting at this many usec. + * We aim to be fair with Reno/CUBIC up to an inter-loss time epoch of at least: + * BDP*RTT = 25Mbps * .030sec /(1514bytes) * 0.030sec = 1.9 secs + */ +static u32 bbr_bw_probe_base_us = 2 * USEC_PER_SEC; /* 2 secs */ + +/* Use BBR-native probes spread over this many usec: */ +static u32 bbr_bw_probe_rand_us = 1 * USEC_PER_SEC; /* 1 secs */ + +/* Undo the model changes made in loss recovery if recovery was spurious? */ +static bool bbr_undo = true; + +/* Use fast path if app-limited, no loss/ECN, and target cwnd was reached? */ +static bool bbr_fast_path = true; /* default: enabled */ + +/* Use fast ack mode ? */ +static int bbr_fast_ack_mode = 1; /* default: rwnd check off */ + +/* How much to additively increase inflight_hi when entering REFILL? */ +static u32 bbr_refill_add_inc; /* default: disabled */ + +module_param_named(beta, bbr_beta, uint, 0644); +module_param_named(ecn_alpha_gain, bbr_ecn_alpha_gain, uint, 0644); +module_param_named(ecn_alpha_init, bbr_ecn_alpha_init, uint, 0644); +module_param_named(ecn_factor, bbr_ecn_factor, uint, 0644); +module_param_named(ecn_thresh, bbr_ecn_thresh, uint, 0644); +module_param_named(ecn_max_rtt_us, bbr_ecn_max_rtt_us, uint, 0644); +module_param_named(ecn_reprobe_gain, bbr_ecn_reprobe_gain, uint, 0644); +module_param_named(loss_thresh, bbr_loss_thresh, uint, 0664); +module_param_named(full_loss_cnt, bbr_full_loss_cnt, uint, 0664); +module_param_named(full_ecn_cnt, bbr_full_ecn_cnt, uint, 0664); +module_param_named(inflight_headroom, bbr_inflight_headroom, uint, 0664); +module_param_named(bw_probe_pif_gain, bbr_bw_probe_pif_gain, uint, 0664); +module_param_named(bw_probe_reno_gain, bbr_bw_probe_reno_gain, uint, 0664); +module_param_named(bw_probe_max_rounds, bbr_bw_probe_max_rounds, uint, 0664); +module_param_named(bw_probe_rand_rounds, bbr_bw_probe_rand_rounds, uint, 0664); +module_param_named(bw_probe_base_us, bbr_bw_probe_base_us, uint, 0664); +module_param_named(bw_probe_rand_us, bbr_bw_probe_rand_us, uint, 0664); +module_param_named(undo, bbr_undo, bool, 0664); +module_param_named(fast_path, bbr_fast_path, bool, 0664); +module_param_named(fast_ack_mode, bbr_fast_ack_mode, uint, 0664); +module_param_named(refill_add_inc, bbr_refill_add_inc, uint, 0664); + +static void bbr2_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr_init(sk); /* run shared init code for v1 and v2 */ + + /* BBR v2 parameters: */ + bbr->params.beta = min_t(u32, 0xFFU, bbr_beta); + bbr->params.ecn_alpha_gain = min_t(u32, 0xFFU, bbr_ecn_alpha_gain); + bbr->params.ecn_alpha_init = min_t(u32, BBR_UNIT, bbr_ecn_alpha_init); + bbr->params.ecn_factor = min_t(u32, 0xFFU, bbr_ecn_factor); + bbr->params.ecn_thresh = min_t(u32, 0xFFU, bbr_ecn_thresh); + bbr->params.ecn_max_rtt_us = min_t(u32, 0x7ffffU, bbr_ecn_max_rtt_us); + bbr->params.ecn_reprobe_gain = min_t(u32, 0x1FF, bbr_ecn_reprobe_gain); + bbr->params.loss_thresh = min_t(u32, 0xFFU, bbr_loss_thresh); + bbr->params.full_loss_cnt = min_t(u32, 0xFU, bbr_full_loss_cnt); + bbr->params.full_ecn_cnt = min_t(u32, 0x3U, bbr_full_ecn_cnt); + bbr->params.inflight_headroom = + min_t(u32, 0xFFU, bbr_inflight_headroom); + bbr->params.bw_probe_pif_gain = + min_t(u32, 0x1FFU, bbr_bw_probe_pif_gain); + bbr->params.bw_probe_reno_gain = + min_t(u32, 0x1FFU, bbr_bw_probe_reno_gain); + bbr->params.bw_probe_max_rounds = + min_t(u32, 0xFFU, bbr_bw_probe_max_rounds); + bbr->params.bw_probe_rand_rounds = + min_t(u32, 0xFU, bbr_bw_probe_rand_rounds); + bbr->params.bw_probe_base_us = + min_t(u32, (1 << 26) - 1, bbr_bw_probe_base_us); + bbr->params.bw_probe_rand_us = + min_t(u32, (1 << 26) - 1, bbr_bw_probe_rand_us); + bbr->params.undo = bbr_undo; + bbr->params.fast_path = bbr_fast_path ? 1 : 0; + bbr->params.refill_add_inc = min_t(u32, 0x3U, bbr_refill_add_inc); + + /* BBR v2 state: */ + bbr->initialized = 1; + /* Start sampling ECN mark rate after first full flight is ACKed: */ + bbr->loss_round_delivered = tp->delivered + 1; + bbr->loss_round_start = 0; + bbr->undo_bw_lo = 0; + bbr->undo_inflight_lo = 0; + bbr->undo_inflight_hi = 0; + bbr->loss_events_in_round = 0; + bbr->startup_ecn_rounds = 0; + bbr2_reset_congestion_signals(sk); + bbr->bw_lo = ~0U; + bbr->bw_hi[0] = 0; + bbr->bw_hi[1] = 0; + bbr->inflight_lo = ~0U; + bbr->inflight_hi = ~0U; + bbr->bw_probe_up_cnt = ~0U; + bbr->bw_probe_up_acks = 0; + bbr->bw_probe_up_rounds = 0; + bbr->probe_wait_us = 0; + bbr->stopped_risky_probe = 0; + bbr->ack_phase = BBR_ACKS_INIT; + bbr->rounds_since_probe = 0; + bbr->bw_probe_samples = 0; + bbr->prev_probe_too_high = 0; + bbr->ecn_eligible = 0; + bbr->ecn_alpha = bbr->params.ecn_alpha_init; + bbr->alpha_last_delivered = 0; + bbr->alpha_last_delivered_ce = 0; + + tp->fast_ack_mode = min_t(u32, 0x2U, bbr_fast_ack_mode); +} + +/* Core TCP stack informs us that the given skb was just marked lost. */ +static void bbr2_skb_marked_lost(struct sock *sk, const struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + struct rate_sample rs; + + /* Capture "current" data over the full round trip of loss, + * to have a better chance to see the full capacity of the path. + */ + if (!bbr->loss_in_round) /* first loss in this round trip? */ + bbr->loss_round_delivered = tp->delivered; /* set round trip */ + bbr->loss_in_round = 1; + bbr->loss_in_cycle = 1; + + if (!bbr->bw_probe_samples) + return; /* not an skb sent while probing for bandwidth */ + if (unlikely(!scb->tx.delivered_mstamp)) + return; /* skb was SACKed, reneged, marked lost; ignore it */ + /* We are probing for bandwidth. Construct a rate sample that + * estimates what happened in the flight leading up to this lost skb, + * then see if the loss rate went too high, and if so at which packet. + */ + memset(&rs, 0, sizeof(rs)); + rs.tx_in_flight = scb->tx.in_flight; + rs.lost = tp->lost - scb->tx.lost; + rs.is_app_limited = scb->tx.is_app_limited; + if (bbr2_is_inflight_too_high(sk, &rs)) { + rs.tx_in_flight = bbr2_inflight_hi_from_lost_skb(sk, &rs, skb); + bbr2_handle_inflight_too_high(sk, &rs); + } +} + +/* Revert short-term model if current loss recovery event was spurious. */ +static u32 bbr2_undo_cwnd(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->debug.undo = 1; + bbr->full_bw = 0; /* spurious slow-down; reset full pipe detection */ + bbr->full_bw_cnt = 0; + bbr->loss_in_round = 0; + + if (!bbr->params.undo) + return tp->snd_cwnd; + + /* Revert to cwnd and other state saved before loss episode. */ + bbr->bw_lo = max(bbr->bw_lo, bbr->undo_bw_lo); + bbr->inflight_lo = max(bbr->inflight_lo, bbr->undo_inflight_lo); + bbr->inflight_hi = max(bbr->inflight_hi, bbr->undo_inflight_hi); + return bbr->prior_cwnd; +} + +/* Entering loss recovery, so save state for when we undo recovery. */ +static u32 bbr2_ssthresh(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr_save_cwnd(sk); + /* For undo, save state that adapts based on loss signal. */ + bbr->undo_bw_lo = bbr->bw_lo; + bbr->undo_inflight_lo = bbr->inflight_lo; + bbr->undo_inflight_hi = bbr->inflight_hi; + return tcp_sk(sk)->snd_ssthresh; +} + +static enum tcp_bbr2_phase bbr2_get_phase(struct bbr *bbr) +{ + switch (bbr->mode) { + case BBR_STARTUP: + return BBR2_PHASE_STARTUP; + case BBR_DRAIN: + return BBR2_PHASE_DRAIN; + case BBR_PROBE_BW: + break; + case BBR_PROBE_RTT: + return BBR2_PHASE_PROBE_RTT; + default: + return BBR2_PHASE_INVALID; + } + switch (bbr->cycle_idx) { + case BBR_BW_PROBE_UP: + return BBR2_PHASE_PROBE_BW_UP; + case BBR_BW_PROBE_DOWN: + return BBR2_PHASE_PROBE_BW_DOWN; + case BBR_BW_PROBE_CRUISE: + return BBR2_PHASE_PROBE_BW_CRUISE; + case BBR_BW_PROBE_REFILL: + return BBR2_PHASE_PROBE_BW_REFILL; + default: + return BBR2_PHASE_INVALID; + } +} + +static size_t bbr2_get_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) +{ + if (ext & (1 << (INET_DIAG_BBRINFO - 1)) || + ext & (1 << (INET_DIAG_VEGASINFO - 1))) { + struct bbr *bbr = inet_csk_ca(sk); + u64 bw = bbr_bw_bytes_per_sec(sk, bbr_bw(sk)); + u64 bw_hi = bbr_bw_bytes_per_sec(sk, bbr_max_bw(sk)); + u64 bw_lo = bbr->bw_lo == ~0U ? + ~0ULL : bbr_bw_bytes_per_sec(sk, bbr->bw_lo); + + memset(&info->bbr2, 0, sizeof(info->bbr2)); + info->bbr2.bbr_bw_lsb = (u32)bw; + info->bbr2.bbr_bw_msb = (u32)(bw >> 32); + info->bbr2.bbr_min_rtt = bbr->min_rtt_us; + info->bbr2.bbr_pacing_gain = bbr->pacing_gain; + info->bbr2.bbr_cwnd_gain = bbr->cwnd_gain; + info->bbr2.bbr_bw_hi_lsb = (u32)bw_hi; + info->bbr2.bbr_bw_hi_msb = (u32)(bw_hi >> 32); + info->bbr2.bbr_bw_lo_lsb = (u32)bw_lo; + info->bbr2.bbr_bw_lo_msb = (u32)(bw_lo >> 32); + info->bbr2.bbr_mode = bbr->mode; + info->bbr2.bbr_phase = (__u8)bbr2_get_phase(bbr); + info->bbr2.bbr_version = (__u8)2; + info->bbr2.bbr_inflight_lo = bbr->inflight_lo; + info->bbr2.bbr_inflight_hi = bbr->inflight_hi; + info->bbr2.bbr_extra_acked = bbr_extra_acked(sk); + *attr = INET_DIAG_BBRINFO; + return sizeof(info->bbr2); + } + return 0; +} + +static void bbr2_set_state(struct sock *sk, u8 new_state) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (new_state == TCP_CA_Loss) { + struct rate_sample rs = { .losses = 1 }; + struct bbr_context ctx = { 0 }; + + bbr->prev_ca_state = TCP_CA_Loss; + bbr->full_bw = 0; + if (!bbr2_is_probing_bandwidth(sk) && bbr->inflight_lo == ~0U) { + /* bbr_adapt_lower_bounds() needs cwnd before + * we suffered an RTO, to update inflight_lo: + */ + bbr->inflight_lo = + max(tp->snd_cwnd, bbr->prior_cwnd); + } + bbr_debug(sk, 0, &rs, &ctx); + } else if (bbr->prev_ca_state == TCP_CA_Loss && + new_state != TCP_CA_Loss) { + tp->snd_cwnd = max(tp->snd_cwnd, bbr->prior_cwnd); + bbr->try_fast_path = 0; /* bound cwnd using latest model */ + } +} + +static struct tcp_congestion_ops tcp_bbr2_cong_ops __read_mostly = { + .flags = TCP_CONG_NON_RESTRICTED | TCP_CONG_WANTS_CE_EVENTS, + .name = "bbr2", + .owner = THIS_MODULE, + .init = bbr2_init, + .cong_control = bbr2_main, + .sndbuf_expand = bbr_sndbuf_expand, + .skb_marked_lost = bbr2_skb_marked_lost, + .undo_cwnd = bbr2_undo_cwnd, + .cwnd_event = bbr_cwnd_event, + .ssthresh = bbr2_ssthresh, + .tso_segs = bbr_tso_segs, + .get_info = bbr2_get_info, + .set_state = bbr2_set_state, +}; + +static int __init bbr_register(void) +{ + BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); + return tcp_register_congestion_control(&tcp_bbr2_cong_ops); +} + +static void __exit bbr_unregister(void) +{ + tcp_unregister_congestion_control(&tcp_bbr2_cong_ops); +} + +module_init(bbr_register); +module_exit(bbr_unregister); + +MODULE_AUTHOR("Van Jacobson "); +MODULE_AUTHOR("Neal Cardwell "); +MODULE_AUTHOR("Yuchung Cheng "); +MODULE_AUTHOR("Soheil Hassas Yeganeh "); +MODULE_AUTHOR("Priyaranjan Jha "); +MODULE_AUTHOR("Yousuk Seung "); +MODULE_AUTHOR("Kevin Yang "); +MODULE_AUTHOR("Arjun Roy "); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)"); diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 563d016e7478..1c94abb6f288 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -179,6 +179,7 @@ void tcp_init_congestion_control(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); tcp_sk(sk)->prior_ssthresh = 0; + tcp_sk(sk)->fast_ack_mode = 0; if (icsk->icsk_ca_ops->init) icsk->icsk_ca_ops->init(sk); if (tcp_ca_needs_ecn(sk)) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9b44caa4b956..dbefccbf9d0c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -348,7 +348,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) tcp_enter_quickack_mode(sk, 2); break; case INET_ECN_CE: - if (tcp_ca_needs_ecn(sk)) + if (tcp_ca_wants_ce_events(sk)) tcp_ca_event(sk, CA_EVENT_ECN_IS_CE); if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) { @@ -359,7 +359,7 @@ static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb) tp->ecn_flags |= TCP_ECN_SEEN; break; default: - if (tcp_ca_needs_ecn(sk)) + if (tcp_ca_wants_ce_events(sk)) tcp_ca_event(sk, CA_EVENT_ECN_NO_CE); tp->ecn_flags |= TCP_ECN_SEEN; break; @@ -1039,7 +1039,12 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) */ static void tcp_notify_skb_loss_event(struct tcp_sock *tp, const struct sk_buff *skb) { + struct sock *sk = (struct sock *)tp; + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + tp->lost += tcp_skb_pcount(skb); + if (ca_ops->skb_marked_lost) + ca_ops->skb_marked_lost(sk, skb); } void tcp_mark_skb_lost(struct sock *sk, struct sk_buff *skb) @@ -1420,6 +1425,17 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, WARN_ON_ONCE(tcp_skb_pcount(skb) < pcount); tcp_skb_pcount_add(skb, -pcount); + /* Adjust tx.in_flight as pcount is shifted from skb to prev. */ + if (WARN_ONCE(TCP_SKB_CB(skb)->tx.in_flight < pcount, + "prev in_flight: %u skb in_flight: %u pcount: %u", + TCP_SKB_CB(prev)->tx.in_flight, + TCP_SKB_CB(skb)->tx.in_flight, + pcount)) + TCP_SKB_CB(skb)->tx.in_flight = 0; + else + TCP_SKB_CB(skb)->tx.in_flight -= pcount; + TCP_SKB_CB(prev)->tx.in_flight += pcount; + /* When we're adding to gso_segs == 1, gso_size will be zero, * in theory this shouldn't be necessary but as long as DSACK * code can come after this skb later on it's better to keep @@ -3182,7 +3198,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, long seq_rtt_us = -1L; long ca_rtt_us = -1L; u32 pkts_acked = 0; - u32 last_in_flight = 0; bool rtt_update; int flag = 0; @@ -3218,7 +3233,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, if (!first_ackt) first_ackt = last_ackt; - last_in_flight = TCP_SKB_CB(skb)->tx.in_flight; if (before(start_seq, reord)) reord = start_seq; if (!after(scb->end_seq, tp->high_seq)) @@ -3284,8 +3298,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, seq_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, first_ackt); ca_rtt_us = tcp_stamp_us_delta(tp->tcp_mstamp, last_ackt); - if (pkts_acked == 1 && last_in_flight < tp->mss_cache && - last_in_flight && !prior_sacked && fully_acked && + if (pkts_acked == 1 && fully_acked && !prior_sacked && + (tp->snd_una - prior_snd_una) < tp->mss_cache && sack->rate->prior_delivered + 1 == tp->delivered && !(flag & (FLAG_CA_ALERT | FLAG_SYN_ACKED))) { /* Conservatively mark a delayed ACK. It's typically @@ -3342,9 +3356,10 @@ static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, if (icsk->icsk_ca_ops->pkts_acked) { struct ack_sample sample = { .pkts_acked = pkts_acked, - .rtt_us = sack->rate->rtt_us, - .in_flight = last_in_flight }; + .rtt_us = sack->rate->rtt_us }; + sample.in_flight = tp->mss_cache * + (tp->delivered - sack->rate->prior_delivered); icsk->icsk_ca_ops->pkts_acked(sk, &sample); } @@ -3742,6 +3757,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; rs.prior_in_flight = tcp_packets_in_flight(tp); + tcp_rate_check_app_limited(sk); /* ts_recent update must be made after we are sure that the packet * is in window. @@ -3839,6 +3855,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) delivered = tcp_newly_delivered(sk, delivered, flag); lost = tp->lost - lost; /* freshly marked lost */ rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); + rs.is_ece = !!(flag & FLAG_ECE); tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); tcp_cong_control(sk, ack, delivered, flag, sack_state.rate); tcp_xmit_recovery(sk, rexmit); @@ -5406,13 +5423,14 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) /* More than one full frame received... */ if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && + (tp->fast_ack_mode == 1 || /* ... and right edge of window advances far enough. * (tcp_recvmsg() will send ACK otherwise). * If application uses SO_RCVLOWAT, we want send ack now if * we have not received enough bytes to satisfy the condition. */ - (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || - __tcp_select_window(sk) >= tp->rcv_wnd)) || + (tp->rcv_nxt - tp->copied_seq < sk->sk_rcvlowat || + __tcp_select_window(sk) >= tp->rcv_wnd))) || /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* Protocol state mandates a one-time immediate ACK */ diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0055ae0a3bf8..7513ba45553d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -804,8 +804,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, tcp_reset(sk, skb); } if (!fastopen) { - inet_csk_reqsk_queue_drop(sk, req); - __NET_INC_STATS(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); + bool unlinked = inet_csk_reqsk_queue_drop(sk, req); + + if (unlinked) + __NET_INC_STATS(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); + *req_stolen = !unlinked; } return NULL; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8478cf749821..77256e55c3dc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1256,8 +1256,6 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tp->tcp_wstamp_ns = max(tp->tcp_wstamp_ns, tp->tcp_clock_cache); skb->skb_mstamp_ns = tp->tcp_wstamp_ns; if (clone_it) { - TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - - tp->snd_una; oskb = skb; tcp_skb_tsorted_save(oskb) { @@ -1536,7 +1534,7 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; - int nsize, old_factor; + int nsize, old_factor, inflight_prev; long limit; int nlen; u8 flags; @@ -1615,6 +1613,15 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, if (diff) tcp_adjust_pcount(sk, skb, diff); + + /* Set buff tx.in_flight as if buff were sent by itself. */ + inflight_prev = TCP_SKB_CB(skb)->tx.in_flight - old_factor; + if (WARN_ONCE(inflight_prev < 0, + "inconsistent: tx.in_flight: %u old_factor: %d", + TCP_SKB_CB(skb)->tx.in_flight, old_factor)) + inflight_prev = 0; + TCP_SKB_CB(buff)->tx.in_flight = inflight_prev + + tcp_skb_pcount(buff); } /* Link BUFF into the send queue. */ @@ -1982,13 +1989,12 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) { const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; - u32 min_tso, tso_segs; - - min_tso = ca_ops->min_tso_segs ? - ca_ops->min_tso_segs(sk) : - sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs; + u32 tso_segs; - tso_segs = tcp_tso_autosize(sk, mss_now, min_tso); + tso_segs = ca_ops->tso_segs ? + ca_ops->tso_segs(sk, mss_now) : + tcp_tso_autosize(sk, mss_now, + sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); return min_t(u32, tso_segs, sk->sk_gso_max_segs); } @@ -2628,6 +2634,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, skb->skb_mstamp_ns = tp->tcp_wstamp_ns = tp->tcp_clock_cache; list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); tcp_init_tso_segs(skb, mss_now); + tcp_set_tx_in_flight(sk, skb); goto repair; /* Skip network transmission */ } diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c index 0de693565963..796fa6e5310c 100644 --- a/net/ipv4/tcp_rate.c +++ b/net/ipv4/tcp_rate.c @@ -34,6 +34,24 @@ * ready to send in the write queue. */ +void tcp_set_tx_in_flight(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 in_flight; + + /* Check, sanitize, and record packets in flight after skb was sent. */ + in_flight = tcp_packets_in_flight(tp) + tcp_skb_pcount(skb); + if (WARN_ONCE(in_flight > TCPCB_IN_FLIGHT_MAX, + "insane in_flight %u cc %s mss %u " + "cwnd %u pif %u %u %u %u\n", + in_flight, inet_csk(sk)->icsk_ca_ops->name, + tp->mss_cache, tp->snd_cwnd, + tp->packets_out, tp->retrans_out, + tp->sacked_out, tp->lost_out)) + in_flight = TCPCB_IN_FLIGHT_MAX; + TCP_SKB_CB(skb)->tx.in_flight = in_flight; +} + /* Snapshot the current delivery information in the skb, to generate * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). */ @@ -65,7 +83,10 @@ void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp; TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; TCP_SKB_CB(skb)->tx.delivered = tp->delivered; + TCP_SKB_CB(skb)->tx.delivered_ce = tp->delivered_ce; + TCP_SKB_CB(skb)->tx.lost = tp->lost; TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; + tcp_set_tx_in_flight(sk, skb); } /* When an skb is sacked or acked, we fill in the rate sample with the (prior) @@ -86,16 +107,20 @@ void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, if (!rs->prior_delivered || after(scb->tx.delivered, rs->prior_delivered)) { + rs->prior_lost = scb->tx.lost; + rs->prior_delivered_ce = scb->tx.delivered_ce; rs->prior_delivered = scb->tx.delivered; rs->prior_mstamp = scb->tx.delivered_mstamp; rs->is_app_limited = scb->tx.is_app_limited; rs->is_retrans = scb->sacked & TCPCB_RETRANS; + rs->tx_in_flight = scb->tx.in_flight; /* Record send time of most recently ACKed packet: */ tp->first_tx_mstamp = tcp_skb_timestamp_us(skb); /* Find the duration of the "send phase" of this window: */ - rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp, - scb->tx.first_tx_mstamp); + rs->interval_us = tcp_stamp32_us_delta( + tp->first_tx_mstamp, + scb->tx.first_tx_mstamp); } /* Mark off the skb delivered once it's sacked to avoid being @@ -137,6 +162,11 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, return; } rs->delivered = tp->delivered - rs->prior_delivered; + rs->lost = tp->lost - rs->prior_lost; + + rs->delivered_ce = tp->delivered_ce - rs->prior_delivered_ce; + /* delivered_ce occupies less than 32 bits in the skb control block */ + rs->delivered_ce &= TCPCB_DELIVERED_CE_MASK; /* Model sending data and receiving ACKs as separate pipeline phases * for a window. Usually the ACK phase is longer, but with ACK @@ -144,7 +174,7 @@ void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, * longer phase. */ snd_us = rs->interval_us; /* send phase */ - ack_us = tcp_stamp_us_delta(tp->tcp_mstamp, + ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp, rs->prior_mstamp); /* ack phase */ rs->interval_us = max(snd_us, ack_us); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 4ef08079ccfa..b5b24caa8ba0 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -607,6 +607,7 @@ void tcp_write_timer_handler(struct sock *sk) goto out; } + tcp_rate_check_app_limited(sk); tcp_mstamp_refresh(tcp_sk(sk)); event = icsk->icsk_pending; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index cfc872689b99..ab770f7ccb30 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -525,7 +525,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, } if (!sk || NAPI_GRO_CB(skb)->encap_mark || - (skb->ip_summed != CHECKSUM_PARTIAL && + (uh->check && skb->ip_summed != CHECKSUM_PARTIAL && NAPI_GRO_CB(skb)->csum_cnt == 0 && !NAPI_GRO_CB(skb)->csum_valid) || !udp_sk(sk)->gro_receive) diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 51184a70ac7e..1578ed9e97d8 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -83,6 +83,9 @@ struct calipso_map_cache_entry { static struct calipso_map_cache_bkt *calipso_cache; +static void calipso_cache_invalidate(void); +static void calipso_doi_putdef(struct calipso_doi *doi_def); + /* Label Mapping Cache Functions */ @@ -444,15 +447,10 @@ static int calipso_doi_remove(u32 doi, struct netlbl_audit *audit_info) ret_val = -ENOENT; goto doi_remove_return; } - if (!refcount_dec_and_test(&doi_def->refcount)) { - spin_unlock(&calipso_doi_list_lock); - ret_val = -EBUSY; - goto doi_remove_return; - } list_del_rcu(&doi_def->list); spin_unlock(&calipso_doi_list_lock); - call_rcu(&doi_def->rcu, calipso_doi_free_rcu); + calipso_doi_putdef(doi_def); ret_val = 0; doi_remove_return: @@ -508,10 +506,8 @@ static void calipso_doi_putdef(struct calipso_doi *doi_def) if (!refcount_dec_and_test(&doi_def->refcount)) return; - spin_lock(&calipso_doi_list_lock); - list_del_rcu(&doi_def->list); - spin_unlock(&calipso_doi_list_lock); + calipso_cache_invalidate(); call_rcu(&doi_def->rcu, calipso_doi_free_rcu); } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index f3d05866692e..fd1f896115c1 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -331,10 +331,9 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st } #if IS_ENABLED(CONFIG_IPV6_MIP6) -static void mip6_addr_swap(struct sk_buff *skb) +static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) { struct ipv6hdr *iph = ipv6_hdr(skb); - struct inet6_skb_parm *opt = IP6CB(skb); struct ipv6_destopt_hao *hao; struct in6_addr tmp; int off; @@ -351,7 +350,7 @@ static void mip6_addr_swap(struct sk_buff *skb) } } #else -static inline void mip6_addr_swap(struct sk_buff *skb) {} +static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {} #endif static struct dst_entry *icmpv6_route_lookup(struct net *net, @@ -446,7 +445,8 @@ static int icmp6_iif(const struct sk_buff *skb) * Send an ICMP message in response to a packet in error */ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, - const struct in6_addr *force_saddr) + const struct in6_addr *force_saddr, + const struct inet6_skb_parm *parm) { struct inet6_dev *idev = NULL; struct ipv6hdr *hdr = ipv6_hdr(skb); @@ -542,7 +542,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type)) goto out_bh_enable; - mip6_addr_swap(skb); + mip6_addr_swap(skb, parm); sk = icmpv6_xmit_lock(net); if (!sk) @@ -559,7 +559,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, /* select a more meaningful saddr from input if */ struct net_device *in_netdev; - in_netdev = dev_get_by_index(net, IP6CB(skb)->iif); + in_netdev = dev_get_by_index(net, parm->iif); if (in_netdev) { ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr, inet6_sk(sk)->srcprefs, @@ -640,7 +640,7 @@ EXPORT_SYMBOL(icmp6_send); */ void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) { - icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL); + icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb)); kfree_skb(skb); } @@ -697,10 +697,10 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type, } if (type == ICMP_TIME_EXCEEDED) icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, - info, &temp_saddr); + info, &temp_saddr, IP6CB(skb2)); else icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, - info, &temp_saddr); + info, &temp_saddr, IP6CB(skb2)); if (rt) ip6_rt_put(rt); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index f43e27555725..1fb79dbde0cb 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -2485,7 +2485,7 @@ static int ipv6_route_native_seq_show(struct seq_file *seq, void *v) const struct net_device *dev; if (rt->nh) - fib6_nh = nexthop_fib6_nh(rt->nh); + fib6_nh = nexthop_fib6_nh_bh(rt->nh); seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index c3bc89b6b1a1..1baf43aacb2e 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -678,8 +678,8 @@ static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb, tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; if (tel->encap_limit == 0) { - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2); + icmpv6_ndo_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); return -1; } *encap_limit = tel->encap_limit - 1; @@ -805,8 +805,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) if (err != 0) { /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); return -1; } @@ -837,7 +837,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) &mtu, skb->protocol); if (err != 0) { if (err == -EMSGSIZE) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); return -1; } @@ -1063,10 +1063,10 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, /* XXX: send ICMP error even if DF is not set. */ if (err == -EMSGSIZE) { if (skb->protocol == htons(ETH_P_IP)) - icmp_send(skb, ICMP_DEST_UNREACH, - ICMP_FRAG_NEEDED, htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); else - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } goto tx_err; diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c index 70c8c2f36c98..9e3574880cb0 100644 --- a/net/ipv6/ip6_icmp.c +++ b/net/ipv6/ip6_icmp.c @@ -33,23 +33,25 @@ int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn) } EXPORT_SYMBOL(inet6_unregister_icmp_sender); -void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) +void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, + const struct inet6_skb_parm *parm) { ip6_icmp_send_t *send; rcu_read_lock(); send = rcu_dereference(ip6_icmp_send); if (send) - send(skb, type, code, info, NULL); + send(skb, type, code, info, NULL, parm); rcu_read_unlock(); } -EXPORT_SYMBOL(icmpv6_send); +EXPORT_SYMBOL(__icmpv6_send); #endif #if IS_ENABLED(CONFIG_NF_NAT) #include void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) { + struct inet6_skb_parm parm = { 0 }; struct sk_buff *cloned_skb = NULL; enum ip_conntrack_info ctinfo; struct in6_addr orig_ip; @@ -57,7 +59,7 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) ct = nf_ct_get(skb_in, &ctinfo); if (!ct || !(ct->status & IPS_SRC_NAT)) { - icmpv6_send(skb_in, type, code, info); + __icmpv6_send(skb_in, type, code, info, &parm); return; } @@ -72,7 +74,7 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) orig_ip = ipv6_hdr(skb_in)->saddr; ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6; - icmpv6_send(skb_in, type, code, info); + __icmpv6_send(skb_in, type, code, info, &parm); ipv6_hdr(skb_in)->saddr = orig_ip; out: consume_skb(cloned_skb); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index e96304d8a4a7..06d60662717d 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -245,16 +245,6 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev, if (ipv6_addr_is_multicast(&hdr->saddr)) goto err; - /* While RFC4291 is not explicit about v4mapped addresses - * in IPv6 headers, it seems clear linux dual-stack - * model can not deal properly with these. - * Security models could be fooled by ::ffff:127.0.0.1 for example. - * - * https://tools.ietf.org/html/draft-itojun-v6ops-v4mapped-harmful-02 - */ - if (ipv6_addr_v4mapped(&hdr->saddr)) - goto err; - skb->transport_header = skb->network_header + sizeof(*hdr); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index a7950baa05e5..3fa0eca5a06f 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1332,8 +1332,8 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, tel = (void *)&skb_network_header(skb)[offset]; if (tel->encap_limit == 0) { - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2); + icmpv6_ndo_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); return -1; } encap_limit = tel->encap_limit - 1; @@ -1385,11 +1385,11 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, if (err == -EMSGSIZE) switch (protocol) { case IPPROTO_IPIP: - icmp_send(skb, ICMP_DEST_UNREACH, - ICMP_FRAG_NEEDED, htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); break; case IPPROTO_IPV6: - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); break; default: break; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 0225fd694192..f10e7a72ea62 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -521,10 +521,10 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); } else { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, - htonl(mtu)); + icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); } err = -EMSGSIZE; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 0d453fa9e327..2e2119bfcf13 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -280,7 +280,7 @@ ip6t_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = rcu_access_pointer(table->private); + private = READ_ONCE(table->private); /* Address dependency. */ cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; @@ -807,7 +807,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -831,7 +831,7 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct ip6t_entry *e; struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; int ret = 0; const void *loc_cpu_entry; @@ -980,7 +980,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, AF_INET6, name); if (!IS_ERR(t)) { struct ip6t_getinfo info; - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -1035,7 +1035,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { - struct xt_table_info *private = xt_table_get_private_protected(t); + struct xt_table_info *private = t->private; if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); @@ -1189,7 +1189,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = xt_table_get_private_protected(t); + private = t->private; if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1552,7 +1552,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = xt_table_get_private_protected(table); + const struct xt_table_info *private = table->private; void __user *pos; unsigned int size; int ret = 0; @@ -1598,7 +1598,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr, xt_compat_lock(AF_INET6); t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = xt_table_get_private_protected(t); + const struct xt_table_info *private = t->private; struct xt_table_info info; ret = compat_table_info(private, &info); if (!ret && get.size == info.size) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 188e114b29b4..0bbfaa55e3c8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -258,34 +258,16 @@ static struct dst_ops ip6_dst_ops_template = { .confirm_neigh = ip6_confirm_neigh, }; -static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) -{ - unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); - - return mtu ? : dst->dev->mtu; -} - -static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu, - bool confirm_neigh) -{ -} - -static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb) -{ -} - static struct dst_ops ip6_dst_blackhole_ops = { - .family = AF_INET6, - .destroy = ip6_dst_destroy, - .check = ip6_dst_check, - .mtu = ip6_blackhole_mtu, - .default_advmss = ip6_default_advmss, - .update_pmtu = ip6_rt_blackhole_update_pmtu, - .redirect = ip6_rt_blackhole_redirect, - .cow_metrics = dst_cow_metrics_generic, - .neigh_lookup = ip6_dst_neigh_lookup, + .family = AF_INET6, + .default_advmss = ip6_default_advmss, + .neigh_lookup = ip6_dst_neigh_lookup, + .check = ip6_dst_check, + .destroy = ip6_dst_destroy, + .cow_metrics = dst_cow_metrics_generic, + .update_pmtu = dst_blackhole_update_pmtu, + .redirect = dst_blackhole_redirect, + .mtu = dst_blackhole_mtu, }; static const u32 ip6_template_metrics[RTAX_MAX] = { diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 93636867aee2..63ccd9f2dccc 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -987,7 +987,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->len > mtu && !skb_is_gso(skb)) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0e1509b02cb3..c07e5e8d557b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1175,6 +1175,11 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!ipv6_unicast_destination(skb)) goto drop; + if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { + __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); + return 0; + } + return tcp_conn_request(&tcp6_request_sock_ops, &tcp_request_sock_ipv6_ops, sk, skb); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 882f028992c3..427a1abce0a8 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -2036,7 +2036,6 @@ static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev, char nullstring[8]; if (!pskb_may_pull(skb, sizeof(*trans_hdr))) { - WARN_ONCE(1, "AF_IUCV failed to receive skb, len=%u", skb->len); kfree_skb(skb); return NET_RX_SUCCESS; } diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 7be5103ff2a8..203890e378cb 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -649,9 +649,9 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, /* Parse and check optional cookie */ if (session->peer_cookie_len > 0) { if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) { - pr_warn_ratelimited("%s: cookie mismatch (%u/%u). Discarding.\n", - tunnel->name, tunnel->tunnel_id, - session->session_id); + pr_debug_ratelimited("%s: cookie mismatch (%u/%u). Discarding.\n", + tunnel->name, tunnel->tunnel_id, + session->session_id); atomic_long_inc(&session->stats.rx_cookie_discards); goto discard; } @@ -702,8 +702,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, * If user has configured mandatory sequence numbers, discard. */ if (session->recv_seq) { - pr_warn_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", - session->name); + pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", + session->name); atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } @@ -718,8 +718,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, session->send_seq = 0; l2tp_session_set_header_len(session, tunnel->version); } else if (session->send_seq) { - pr_warn_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", - session->name); + pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n", + session->name); atomic_long_inc(&session->stats.rx_seq_discards); goto discard; } @@ -809,9 +809,9 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) /* Short packet? */ if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) { - pr_warn_ratelimited("%s: recv short packet (len=%d)\n", - tunnel->name, skb->len); - goto error; + pr_debug_ratelimited("%s: recv short packet (len=%d)\n", + tunnel->name, skb->len); + goto invalid; } /* Point to L2TP header */ @@ -824,9 +824,9 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) /* Check protocol version */ version = hdrflags & L2TP_HDR_VER_MASK; if (version != tunnel->version) { - pr_warn_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n", - tunnel->name, version, tunnel->version); - goto error; + pr_debug_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n", + tunnel->name, version, tunnel->version); + goto invalid; } /* Get length of L2TP packet */ @@ -834,7 +834,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) /* If type is control packet, it is handled by userspace. */ if (hdrflags & L2TP_HDRFLAG_T) - goto error; + goto pass; /* Skip flags */ ptr += 2; @@ -863,21 +863,24 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) l2tp_session_dec_refcount(session); /* Not found? Pass to userspace to deal with */ - pr_warn_ratelimited("%s: no session found (%u/%u). Passing up.\n", - tunnel->name, tunnel_id, session_id); - goto error; + pr_debug_ratelimited("%s: no session found (%u/%u). Passing up.\n", + tunnel->name, tunnel_id, session_id); + goto pass; } if (tunnel->version == L2TP_HDR_VER_3 && l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) - goto error; + goto invalid; l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); l2tp_session_dec_refcount(session); return 0; -error: +invalid: + atomic_long_inc(&tunnel->stats.rx_invalid); + +pass: /* Put UDP header back */ __skb_push(skb, sizeof(struct udphdr)); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index cb21d906343e..98ea98eb9567 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -39,6 +39,7 @@ struct l2tp_stats { atomic_long_t rx_oos_packets; atomic_long_t rx_errors; atomic_long_t rx_cookie_discards; + atomic_long_t rx_invalid; }; struct l2tp_tunnel; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 83956c9ee1fc..96eb91be9238 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -428,6 +428,9 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS, atomic_long_read(&tunnel->stats.rx_errors), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_INVALID, + atomic_long_read(&tunnel->stats.rx_invalid), L2TP_ATTR_STATS_PAD)) goto nla_put_failure; nla_nest_end(skb, nest); @@ -771,6 +774,9 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl L2TP_ATTR_STATS_PAD) || nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS, atomic_long_read(&session->stats.rx_errors), + L2TP_ATTR_STATS_PAD) || + nla_put_u64_64bit(skb, L2TP_ATTR_RX_INVALID, + atomic_long_read(&session->stats.rx_invalid), L2TP_ATTR_STATS_PAD)) goto nla_put_failure; nla_nest_end(skb, nest); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index c4c70e30ad7f..68a0de02b561 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2950,14 +2950,14 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, continue; for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++) { - if (~sdata->rc_rateidx_mcs_mask[i][j]) { + if (sdata->rc_rateidx_mcs_mask[i][j] != 0xff) { sdata->rc_has_mcs_mask[i] = true; break; } } for (j = 0; j < NL80211_VHT_NSS_MAX; j++) { - if (~sdata->rc_rateidx_vht_mcs_mask[i][j]) { + if (sdata->rc_rateidx_vht_mcs_mask[i][j] != 0xffff) { sdata->rc_has_vht_mcs_mask[i] = true; break; } diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 1f552f374e97..a7ac53a2f00d 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1874,6 +1874,8 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) /* remove beacon */ kfree(sdata->u.ibss.ie); + sdata->u.ibss.ie = NULL; + sdata->u.ibss.ie_len = 0; /* on the next join, re-program HT parameters */ memset(&ifibss->ht_capa, 0, sizeof(ifibss->ht_capa)); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 313eee12410e..3db514c4c63a 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -356,7 +356,7 @@ u32 airtime_link_metric_get(struct ieee80211_local *local, */ tx_time = (device_constant + 10 * test_frame_len / rate); estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err)); - result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT); + result = ((u64)tx_time * estimated_retx) >> (2 * ARITH_SHIFT); return (u32)result; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0e4d950cf907..9db648a91a4f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -5071,7 +5071,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, he_oper_ie = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION, ies->data, ies->len); if (he_oper_ie && - he_oper_ie[1] == ieee80211_he_oper_size(&he_oper_ie[3])) + he_oper_ie[1] >= ieee80211_he_oper_size(&he_oper_ie[3])) he_oper = (void *)(he_oper_ie + 3); else he_oper = NULL; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 8d3ae6b2f95f..f4507a708965 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -968,7 +968,7 @@ static void ieee80211_parse_extension_element(u32 *crc, break; case WLAN_EID_EXT_HE_OPERATION: if (len >= sizeof(*elems->he_operation) && - len == ieee80211_he_oper_size(data) - 1) { + len >= ieee80211_he_oper_size(data) - 1) { if (crc) *crc = crc32_be(*crc, (void *)elem, elem->datalen + 2); diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index b1690149b6fa..1482259de9b5 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -14,6 +14,7 @@ #include #include #include +#include static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, netdev_features_t features) @@ -27,6 +28,8 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, skb_reset_network_header(skb); mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb); + if (unlikely(!mpls_hlen || mpls_hlen % MPLS_HLEN)) + goto out; if (unlikely(!pskb_may_pull(skb, mpls_hlen))) goto out; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index e0d21c0607e5..37ef0bf098f6 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -401,6 +401,7 @@ static void clear_3rdack_retransmission(struct sock *sk) } static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, + bool snd_data_fin_enable, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) @@ -418,9 +419,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, if (!skb) return false; - /* MPC/MPJ needed only on 3rd ack packet */ - if (subflow->fully_established || - subflow->snd_isn != TCP_SKB_CB(skb)->seq) + /* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */ + if (subflow->fully_established || snd_data_fin_enable || + subflow->snd_isn != TCP_SKB_CB(skb)->seq || + sk->sk_state != TCP_ESTABLISHED) return false; if (subflow->mp_capable) { @@ -492,6 +494,7 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, } static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, + bool snd_data_fin_enable, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) @@ -499,13 +502,11 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); unsigned int dss_size = 0; - u64 snd_data_fin_enable; struct mptcp_ext *mpext; unsigned int ack_size; bool ret = false; mpext = skb ? mptcp_get_ext(skb) : NULL; - snd_data_fin_enable = mptcp_data_fin_enabled(msk); if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) { unsigned int map_size; @@ -554,15 +555,15 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, } static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id, - struct in_addr *addr) + struct in_addr *addr, u16 port) { u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[7]; msg[0] = addr_id; memcpy(&msg[1], &addr->s_addr, 4); - msg[5] = 0; - msg[6] = 0; + msg[5] = port >> 8; + msg[6] = port & 0xFF; mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac); @@ -571,15 +572,15 @@ static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id, #if IS_ENABLED(CONFIG_MPTCP_IPV6) static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id, - struct in6_addr *addr) + struct in6_addr *addr, u16 port) { u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[19]; msg[0] = addr_id; memcpy(&msg[1], &addr->s6_addr, 16); - msg[17] = 0; - msg[18] = 0; + msg[17] = port >> 8; + msg[18] = port & 0xFF; mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac); @@ -633,7 +634,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * opts->ahmac = add_addr_generate_hmac(msk->local_key, msk->remote_key, opts->addr_id, - &opts->addr); + &opts->addr, + opts->port); } } #if IS_ENABLED(CONFIG_MPTCP_IPV6) @@ -644,7 +646,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * opts->ahmac = add_addr6_generate_hmac(msk->local_key, msk->remote_key, opts->addr_id, - &opts->addr6); + &opts->addr6, + opts->port); } } #endif @@ -683,12 +686,15 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, unsigned int *size, unsigned int remaining, struct mptcp_out_options *opts) { + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); unsigned int opt_size = 0; + bool snd_data_fin; bool ret = false; opts->suboptions = 0; - if (unlikely(mptcp_check_fallback(sk))) + if (unlikely(__mptcp_check_fallback(msk))) return false; /* prevent adding of any MPTCP related options on reset packet @@ -697,10 +703,10 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) return false; - if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts)) + snd_data_fin = mptcp_data_fin_enabled(msk); + if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts)) ret = true; - else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining, - opts)) + else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) ret = true; /* we reserved enough space for the above options, and exceeding the @@ -918,12 +924,14 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk, if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) hmac = add_addr_generate_hmac(msk->remote_key, msk->local_key, - mp_opt->addr_id, &mp_opt->addr); + mp_opt->addr_id, &mp_opt->addr, + mp_opt->port); #if IS_ENABLED(CONFIG_MPTCP_IPV6) else hmac = add_addr6_generate_hmac(msk->remote_key, msk->local_key, - mp_opt->addr_id, &mp_opt->addr6); + mp_opt->addr_id, &mp_opt->addr6, + mp_opt->port); #endif pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n", diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index da2ed576f289..1c01c3bcbf5a 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -20,6 +20,8 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, pr_debug("msk=%p, local_id=%d", msk, addr->id); + lockdep_assert_held(&msk->pm.lock); + if (add_addr) { pr_warn("addr_signal error, add_addr=%d", add_addr); return -EINVAL; @@ -188,8 +190,7 @@ void mptcp_pm_add_addr_received(struct mptcp_sock *msk, void mptcp_pm_add_addr_send_ack(struct mptcp_sock *msk) { - if (!mptcp_pm_should_add_signal_ipv6(msk) && - !mptcp_pm_should_add_signal_port(msk)) + if (!mptcp_pm_should_add_signal(msk)) return; mptcp_pm_schedule_work(msk, MPTCP_PM_ADD_ADDR_SEND_ACK); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index a6d983d80576..71c41b948861 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -134,6 +134,8 @@ select_local_address(const struct pm_nl_pernet *pernet, { struct mptcp_pm_addr_entry *entry, *ret = NULL; + msk_owned_by_me(msk); + rcu_read_lock(); __mptcp_flush_join_list(msk); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { @@ -191,6 +193,8 @@ lookup_anno_list_by_saddr(struct mptcp_sock *msk, { struct mptcp_pm_add_entry *entry; + lockdep_assert_held(&msk->pm.lock); + list_for_each_entry(entry, &msk->pm.anno_list, list) { if (addresses_equal(&entry->addr, addr, false)) return entry; @@ -266,6 +270,8 @@ static bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, struct sock *sk = (struct sock *)msk; struct net *net = sock_net(sk); + lockdep_assert_held(&msk->pm.lock); + if (lookup_anno_list_by_saddr(msk, &entry->addr)) return false; @@ -408,8 +414,10 @@ void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; - if (!mptcp_pm_should_add_signal_ipv6(msk) && - !mptcp_pm_should_add_signal_port(msk)) + msk_owned_by_me(msk); + lockdep_assert_held(&msk->pm.lock); + + if (!mptcp_pm_should_add_signal(msk)) return; __mptcp_flush_join_list(msk); @@ -419,10 +427,9 @@ void mptcp_pm_nl_add_addr_send_ack(struct mptcp_sock *msk) u8 add_addr; spin_unlock_bh(&msk->pm.lock); - if (mptcp_pm_should_add_signal_ipv6(msk)) - pr_debug("send ack for add_addr6"); - if (mptcp_pm_should_add_signal_port(msk)) - pr_debug("send ack for add_addr_port"); + pr_debug("send ack for add_addr%s%s", + mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "", + mptcp_pm_should_add_signal_port(msk) ? " [port]" : ""); lock_sock(ssk); tcp_send_ack(ssk); @@ -445,6 +452,8 @@ void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) pr_debug("address rm_id %d", msk->pm.rm_id); + msk_owned_by_me(msk); + if (!msk->pm.rm_id) return; @@ -480,6 +489,8 @@ void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, u8 rm_id) pr_debug("subflow rm_id %d", rm_id); + msk_owned_by_me(msk); + if (!rm_id) return; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f998a077c7dd..7345df40385a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -114,11 +114,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk) list_add(&subflow->node, &msk->conn_list); sock_hold(ssock->sk); subflow->request_mptcp = 1; - - /* accept() will wait on first subflow sk_wq, and we always wakes up - * via msk->sk_socket - */ - RCU_INIT_POINTER(msk->first->sk_wq, &sk->sk_socket->wq); + mptcp_sock_graft(msk->first, sk->sk_socket); return 0; } @@ -364,8 +360,6 @@ static void mptcp_check_data_fin_ack(struct sock *sk) /* Look for an acknowledged DATA_FIN */ if (mptcp_pending_data_fin_ack(sk)) { - mptcp_stop_timer(sk); - WRITE_ONCE(msk->snd_data_fin_enable, 0); switch (sk->sk_state) { @@ -1182,6 +1176,7 @@ static bool mptcp_tx_cache_refill(struct sock *sk, int size, */ while (skbs->qlen > 1) { skb = __skb_dequeue_tail(skbs); + *total_ts -= skb->truesize; __kfree_skb(skb); } return skbs->qlen > 0; @@ -2105,6 +2100,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) return backup; } +static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk) +{ + if (msk->subflow) { + iput(SOCK_INODE(msk->subflow)); + msk->subflow = NULL; + } +} + /* subflow sockets can be either outgoing (connect) or incoming * (accept). * @@ -2116,8 +2119,7 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk) void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow) { - bool dispose_socket = false; - struct socket *sock; + struct mptcp_sock *msk = mptcp_sk(sk); list_del(&subflow->node); @@ -2126,11 +2128,8 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, /* if we are invoked by the msk cleanup code, the subflow is * already orphaned */ - sock = ssk->sk_socket; - if (sock) { - dispose_socket = sock != sk->sk_socket; + if (ssk->sk_socket) sock_orphan(ssk); - } subflow->disposable = 1; @@ -2148,10 +2147,14 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, __sock_put(ssk); } release_sock(ssk); - if (dispose_socket) - iput(SOCK_INODE(sock)); sock_put(ssk); + + if (ssk == msk->last_snd) + msk->last_snd = NULL; + + if (msk->subflow && ssk == msk->subflow->sk) + mptcp_dispose_initial_subflow(msk); } static unsigned int mptcp_sync_mss(struct sock *sk, u32 pmtu) @@ -2194,6 +2197,8 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow, *tmp; + might_sleep(); + list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); @@ -2299,6 +2304,7 @@ static void mptcp_worker(struct work_struct *work) if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags)) goto unlock; + __mptcp_clean_una(sk); dfrag = mptcp_rtx_head(sk); if (!dfrag) goto unlock; @@ -2536,6 +2542,8 @@ static void __mptcp_destroy_sock(struct sock *sk) pr_debug("msk=%p", msk); + might_sleep(); + /* be sure to always acquire the join list lock, to sync vs * mptcp_finish_join(). */ @@ -2560,6 +2568,7 @@ static void __mptcp_destroy_sock(struct sock *sk) sk_stream_kill_queues(sk); xfrm_sk_free_policy(sk); sk_refcnt_debug_release(sk); + mptcp_dispose_initial_subflow(msk); sock_put(sk); } @@ -2586,20 +2595,10 @@ static void mptcp_close(struct sock *sk, long timeout) inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32; list_for_each_entry(subflow, &mptcp_sk(sk)->conn_list, node) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow, dispose_socket; - struct socket *sock; + bool slow = lock_sock_fast(ssk); - slow = lock_sock_fast(ssk); - sock = ssk->sk_socket; - dispose_socket = sock && sock != sk->sk_socket; sock_orphan(ssk); unlock_sock_fast(ssk, slow); - - /* for the outgoing subflows we additionally need to free - * the associated socket - */ - if (dispose_socket) - iput(SOCK_INODE(sock)); } sock_orphan(sk); @@ -3041,7 +3040,7 @@ void mptcp_finish_connect(struct sock *ssk) mptcp_rcv_space_init(msk, ssk); } -static void mptcp_sock_graft(struct sock *sk, struct socket *parent) +void mptcp_sock_graft(struct sock *sk, struct socket *parent) { write_lock_bh(&sk->sk_callback_lock); rcu_assign_pointer(sk->sk_wq, &parent->wq); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d67de793d363..c374345ad134 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -50,14 +50,15 @@ #define TCPOLEN_MPTCP_DSS_MAP64 14 #define TCPOLEN_MPTCP_DSS_CHECKSUM 2 #define TCPOLEN_MPTCP_ADD_ADDR 16 -#define TCPOLEN_MPTCP_ADD_ADDR_PORT 20 +#define TCPOLEN_MPTCP_ADD_ADDR_PORT 18 #define TCPOLEN_MPTCP_ADD_ADDR_BASE 8 -#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 12 +#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT 10 #define TCPOLEN_MPTCP_ADD_ADDR6 28 -#define TCPOLEN_MPTCP_ADD_ADDR6_PORT 32 +#define TCPOLEN_MPTCP_ADD_ADDR6_PORT 30 #define TCPOLEN_MPTCP_ADD_ADDR6_BASE 20 -#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 24 -#define TCPOLEN_MPTCP_PORT_LEN 4 +#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT 22 +#define TCPOLEN_MPTCP_PORT_LEN 2 +#define TCPOLEN_MPTCP_PORT_ALIGN 2 #define TCPOLEN_MPTCP_RM_ADDR_BASE 4 #define TCPOLEN_MPTCP_FASTCLOSE 12 @@ -285,6 +286,11 @@ struct mptcp_sock { #define mptcp_for_each_subflow(__msk, __subflow) \ list_for_each_entry(__subflow, &((__msk)->conn_list), node) +static inline void msk_owned_by_me(const struct mptcp_sock *msk) +{ + sock_owned_by_me((const struct sock *)msk); +} + static inline struct mptcp_sock *mptcp_sk(const struct sock *sk) { return (struct mptcp_sock *)sk; @@ -325,20 +331,13 @@ static inline struct mptcp_data_frag *mptcp_pending_tail(const struct sock *sk) return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); } -static inline struct mptcp_data_frag *mptcp_rtx_tail(const struct sock *sk) +static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - if (!before64(msk->snd_nxt, READ_ONCE(msk->snd_una))) + if (msk->snd_una == READ_ONCE(msk->snd_nxt)) return NULL; - return list_last_entry(&msk->rtx_queue, struct mptcp_data_frag, list); -} - -static inline struct mptcp_data_frag *mptcp_rtx_head(const struct sock *sk) -{ - struct mptcp_sock *msk = mptcp_sk(sk); - return list_first_entry_or_null(&msk->rtx_queue, struct mptcp_data_frag, list); } @@ -466,6 +465,7 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); void mptcp_subflow_reset(struct sock *ssk); +void mptcp_sock_graft(struct sock *sk, struct socket *parent); /* called with sk socket lock held */ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, @@ -594,8 +594,9 @@ static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port) len = TCPOLEN_MPTCP_ADD_ADDR6_BASE; if (!echo) len += MPTCPOPT_THMAC_LEN; + /* account for 2 trailing 'nop' options */ if (port) - len += TCPOLEN_MPTCP_PORT_LEN; + len += TCPOLEN_MPTCP_PORT_LEN + TCPOLEN_MPTCP_PORT_ALIGN; return len; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 278cbe3e539e..96e040951cd4 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -440,6 +440,11 @@ static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!ipv6_unicast_destination(skb)) goto drop; + if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { + __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); + return 0; + } + return tcp_conn_request(&mptcp_subflow_request_sock_ops, &subflow_request_sock_ipv6_ops, sk, skb); @@ -1026,6 +1031,12 @@ static void subflow_data_ready(struct sock *sk) msk = mptcp_sk(parent); if (state & TCPF_LISTEN) { + /* MPJ subflow are removed from accept queue before reaching here, + * avoid stray wakeups + */ + if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue)) + return; + set_bit(MPTCP_DATA_READY, &msk->flags); parent->sk_data_ready(parent); return; @@ -1159,12 +1170,16 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc, if (err && err != -EINPROGRESS) goto failed_unlink; + /* discard the subflow socket */ + mptcp_sock_graft(ssk, sk->sk_socket); + iput(SOCK_INODE(sf)); return err; failed_unlink: spin_lock_bh(&msk->join_list_lock); list_del(&subflow->node); spin_unlock_bh(&msk->join_list_lock); + sock_put(mptcp_subflow_tcp_sock(subflow)); failed: subflow->disposable = 1; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 84caf3316946..e0c566b3df90 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2969,6 +2969,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb, memset(&m, 0xFF, sizeof(m)); memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3)); m.src.u.all = mask->src.u.all; + m.src.l3num = tuple->src.l3num; m.dst.protonum = tuple->dst.protonum; nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 4a4acbba78ff..b03feb6e1226 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -506,7 +506,7 @@ int nf_flow_table_init(struct nf_flowtable *flowtable) { int err; - INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); + INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc); flow_block_init(&flowtable->flow_block); init_rwsem(&flowtable->flow_block_lock); diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index e87b6bd6b3cd..4731d21fc3ad 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -646,8 +646,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, } static unsigned int -nf_nat_ipv4_in(void *priv, struct sk_buff *skb, - const struct nf_hook_state *state) +nf_nat_ipv4_pre_routing(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) { unsigned int ret; __be32 daddr = ip_hdr(skb)->daddr; @@ -659,6 +659,23 @@ nf_nat_ipv4_in(void *priv, struct sk_buff *skb, return ret; } +static unsigned int +nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + __be32 saddr = ip_hdr(skb)->saddr; + struct sock *sk = skb->sk; + unsigned int ret; + + ret = nf_nat_ipv4_fn(priv, skb, state); + + if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr && + !inet_sk_transparent(sk)) + skb_orphan(skb); /* TCP edemux obtained wrong socket */ + + return ret; +} + static unsigned int nf_nat_ipv4_out(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -736,7 +753,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, static const struct nf_hook_ops nf_nat_ipv4_ops[] = { /* Before packet filtering, change destination */ { - .hook = nf_nat_ipv4_in, + .hook = nf_nat_ipv4_pre_routing, .pf = NFPROTO_IPV4, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP_PRI_NAT_DST, @@ -757,7 +774,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = { }, /* After packet filtering, change source */ { - .hook = nf_nat_ipv4_fn, + .hook = nf_nat_ipv4_local_in, .pf = NFPROTO_IPV4, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP_PRI_NAT_SRC, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8ee9f40cc0ea..24a7a6b17268 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6808,6 +6808,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, struct nft_hook *hook, *next; struct nft_trans *trans; bool unregister = false; + u32 flags; int err; err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK], @@ -6822,6 +6823,17 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, } } + if (nla[NFTA_FLOWTABLE_FLAGS]) { + flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS])); + if (flags & ~NFT_FLOWTABLE_MASK) + return -EOPNOTSUPP; + if ((flowtable->data.flags & NFT_FLOWTABLE_HW_OFFLOAD) ^ + (flags & NFT_FLOWTABLE_HW_OFFLOAD)) + return -EOPNOTSUPP; + } else { + flags = flowtable->data.flags; + } + err = nft_register_flowtable_net_hooks(ctx->net, ctx->table, &flowtable_hook.list, flowtable); if (err < 0) @@ -6835,6 +6847,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh, goto err_flowtable_update_hook; } + nft_trans_flowtable_flags(trans) = flags; nft_trans_flowtable(trans) = flowtable; nft_trans_flowtable_update(trans) = true; INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans)); @@ -6929,8 +6942,10 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk, if (nla[NFTA_FLOWTABLE_FLAGS]) { flowtable->data.flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS])); - if (flowtable->data.flags & ~NFT_FLOWTABLE_MASK) + if (flowtable->data.flags & ~NFT_FLOWTABLE_MASK) { + err = -EOPNOTSUPP; goto err3; + } } write_pnet(&flowtable->data.net, net); @@ -8142,6 +8157,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) { + nft_trans_flowtable(trans)->data.flags = + nft_trans_flowtable_flags(trans); nf_tables_flowtable_notify(&trans->ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index acce622582e3..6bd31a7a27fc 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -330,6 +330,7 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp) const struct xt_match *m; int have_rev = 0; + mutex_lock(&xt[af].mutex); list_for_each_entry(m, &xt[af].match, list) { if (strcmp(m->name, name) == 0) { if (m->revision > *bestp) @@ -338,6 +339,7 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp) have_rev = 1; } } + mutex_unlock(&xt[af].mutex); if (af != NFPROTO_UNSPEC && !have_rev) return match_revfn(NFPROTO_UNSPEC, name, revision, bestp); @@ -350,6 +352,7 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp) const struct xt_target *t; int have_rev = 0; + mutex_lock(&xt[af].mutex); list_for_each_entry(t, &xt[af].target, list) { if (strcmp(t->name, name) == 0) { if (t->revision > *bestp) @@ -358,6 +361,7 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp) have_rev = 1; } } + mutex_unlock(&xt[af].mutex); if (af != NFPROTO_UNSPEC && !have_rev) return target_revfn(NFPROTO_UNSPEC, name, revision, bestp); @@ -371,12 +375,10 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target, { int have_rev, best = -1; - mutex_lock(&xt[af].mutex); if (target == 1) have_rev = target_revfn(af, name, revision, &best); else have_rev = match_revfn(af, name, revision, &best); - mutex_unlock(&xt[af].mutex); /* Nothing at all? Return 0 to try loading module. */ if (best == -1) { @@ -1349,14 +1351,6 @@ struct xt_counters *xt_counters_alloc(unsigned int counters) } EXPORT_SYMBOL(xt_counters_alloc); -struct xt_table_info -*xt_table_get_private_protected(const struct xt_table *table) -{ - return rcu_dereference_protected(table->private, - mutex_is_locked(&xt[table->af].mutex)); -} -EXPORT_SYMBOL(xt_table_get_private_protected); - struct xt_table_info * xt_replace_table(struct xt_table *table, unsigned int num_counters, @@ -1364,6 +1358,7 @@ xt_replace_table(struct xt_table *table, int *error) { struct xt_table_info *private; + unsigned int cpu; int ret; ret = xt_jumpstack_alloc(newinfo); @@ -1373,20 +1368,47 @@ xt_replace_table(struct xt_table *table, } /* Do the substitution. */ - private = xt_table_get_private_protected(table); + local_bh_disable(); + private = table->private; /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { pr_debug("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); + local_bh_enable(); *error = -EAGAIN; return NULL; } newinfo->initial_entries = private->initial_entries; + /* + * Ensure contents of newinfo are visible before assigning to + * private. + */ + smp_wmb(); + table->private = newinfo; + + /* make sure all cpus see new ->private value */ + smp_mb(); + + /* + * Even though table entries have now been swapped, other CPU's + * may still be using the old entries... + */ + local_bh_enable(); - rcu_assign_pointer(table->private, newinfo); - synchronize_rcu(); + /* ... so wait for even xt_recseq on all cpus */ + for_each_possible_cpu(cpu) { + seqcount_t *s = &per_cpu(xt_recseq, cpu); + u32 seq = raw_read_seqcount(s); + + if (seq & 1) { + do { + cond_resched(); + cpu_relax(); + } while (seq == raw_read_seqcount(s)); + } + } audit_log_nfcfg(table->name, table->af, private->number, !private->number ? AUDIT_XT_OP_REGISTER : @@ -1422,12 +1444,12 @@ struct xt_table *xt_register_table(struct net *net, } /* Simplifies replace_table code. */ - rcu_assign_pointer(table->private, bootstrap); + table->private = bootstrap; if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; - private = xt_table_get_private_protected(table); + private = table->private; pr_debug("table->private->number = %u\n", private->number); /* save number of initial entries */ @@ -1450,8 +1472,7 @@ void *xt_unregister_table(struct xt_table *table) struct xt_table_info *private; mutex_lock(&xt[table->af].mutex); - private = xt_table_get_private_protected(table); - RCU_INIT_POINTER(table->private, NULL); + private = table->private; list_del(&table->list); mutex_unlock(&xt[table->af].mutex); audit_log_nfcfg(table->name, table->af, private->number, diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 726dda95934c..4f50a64315cf 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -575,6 +575,7 @@ static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info) break; } + cipso_v4_doi_putdef(doi_def); rcu_read_unlock(); genlmsg_end(ans_skb, data); @@ -583,12 +584,14 @@ static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info) list_retry: /* XXX - this limit is a guesstimate */ if (nlsze_mult < 4) { + cipso_v4_doi_putdef(doi_def); rcu_read_unlock(); kfree_skb(ans_skb); nlsze_mult *= 2; goto list_start; } list_failure_lock: + cipso_v4_doi_putdef(doi_def); rcu_read_unlock(); list_failure: kfree_skb(ans_skb); diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c index 11b554ce07ff..1204c438e87d 100644 --- a/net/nfc/nci/uart.c +++ b/net/nfc/nci/uart.c @@ -292,7 +292,8 @@ static int nci_uart_tty_ioctl(struct tty_struct *tty, struct file *file, /* We don't provide read/write/poll interface for user space. */ static ssize_t nci_uart_tty_read(struct tty_struct *tty, struct file *file, - unsigned char __user *buf, size_t nr) + unsigned char *buf, size_t nr, + void **cookie, unsigned long offset) { return 0; } diff --git a/net/psample/psample.c b/net/psample/psample.c index 33e238c965bd..482c07f2766b 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -309,10 +309,10 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info) unsigned short tun_proto = ip_tunnel_info_af(tun_info); const struct ip_tunnel_key *tun_key = &tun_info->key; int tun_opts_len = tun_info->options_len; - int sum = 0; + int sum = nla_total_size(0); /* PSAMPLE_ATTR_TUNNEL */ if (tun_key->tun_flags & TUNNEL_KEY) - sum += nla_total_size(sizeof(u64)); + sum += nla_total_size_64bit(sizeof(u64)); if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE) sum += nla_total_size(0); diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index b34358282f37..dfc820ee553a 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -439,7 +439,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) if (len == 0 || len & 3) return -EINVAL; - skb = netdev_alloc_skb(NULL, len); + skb = __netdev_alloc_skb(NULL, len, GFP_ATOMIC | __GFP_NOWARN); if (!skb) return -ENOMEM; @@ -958,8 +958,10 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) plen = (len + 3) & ~3; skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE, msg->msg_flags & MSG_DONTWAIT, &rc); - if (!skb) + if (!skb) { + rc = -ENOMEM; goto out_node; + } skb_reserve(skb, QRTR_HDR_MAX_SIZE); @@ -1056,6 +1058,11 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg, rc = copied; if (addr) { + /* There is an anonymous 2-byte hole after sq_family, + * make sure to clear it. + */ + memset(addr, 0, sizeof(*addr)); + addr->sq_family = AF_QIPCRTR; addr->sq_node = cb->src_node; addr->sq_port = cb->src_port; diff --git a/net/qrtr/tun.c b/net/qrtr/tun.c index b238c40a9984..304b41fea5ab 100644 --- a/net/qrtr/tun.c +++ b/net/qrtr/tun.c @@ -31,6 +31,7 @@ static int qrtr_tun_send(struct qrtr_endpoint *ep, struct sk_buff *skb) static int qrtr_tun_open(struct inode *inode, struct file *filp) { struct qrtr_tun *tun; + int ret; tun = kzalloc(sizeof(*tun), GFP_KERNEL); if (!tun) @@ -43,7 +44,16 @@ static int qrtr_tun_open(struct inode *inode, struct file *filp) filp->private_data = tun; - return qrtr_endpoint_register(&tun->ep, QRTR_EP_NID_AUTO); + ret = qrtr_endpoint_register(&tun->ep, QRTR_EP_NID_AUTO); + if (ret) + goto out; + + return 0; + +out: + filp->private_data = NULL; + kfree(tun); + return ret; } static ssize_t qrtr_tun_read_iter(struct kiocb *iocb, struct iov_iter *to) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 2e85b636b27b..b919826939e0 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -908,7 +908,7 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_HW_STATS] = NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY), }; -static void tcf_idr_insert_many(struct tc_action *actions[]) +void tcf_idr_insert_many(struct tc_action *actions[]) { int i; @@ -928,19 +928,13 @@ static void tcf_idr_insert_many(struct tc_action *actions[]) } } -struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, - struct nlattr *nla, struct nlattr *est, - char *name, int ovr, int bind, - bool rtnl_held, - struct netlink_ext_ack *extack) +struct tc_action_ops *tc_action_load_ops(char *name, struct nlattr *nla, + bool rtnl_held, + struct netlink_ext_ack *extack) { - struct nla_bitfield32 flags = { 0, 0 }; - u8 hw_stats = TCA_ACT_HW_STATS_ANY; - struct tc_action *a; + struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_action_ops *a_o; - struct tc_cookie *cookie = NULL; char act_name[IFNAMSIZ]; - struct nlattr *tb[TCA_ACT_MAX + 1]; struct nlattr *kind; int err; @@ -948,33 +942,21 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, tcf_action_policy, extack); if (err < 0) - goto err_out; + return ERR_PTR(err); err = -EINVAL; kind = tb[TCA_ACT_KIND]; if (!kind) { NL_SET_ERR_MSG(extack, "TC action kind must be specified"); - goto err_out; + return ERR_PTR(err); } if (nla_strscpy(act_name, kind, IFNAMSIZ) < 0) { NL_SET_ERR_MSG(extack, "TC action name too long"); - goto err_out; - } - if (tb[TCA_ACT_COOKIE]) { - cookie = nla_memdup_cookie(tb); - if (!cookie) { - NL_SET_ERR_MSG(extack, "No memory to generate TC cookie"); - err = -ENOMEM; - goto err_out; - } + return ERR_PTR(err); } - hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]); - if (tb[TCA_ACT_FLAGS]) - flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); } else { if (strlcpy(act_name, name, IFNAMSIZ) >= IFNAMSIZ) { NL_SET_ERR_MSG(extack, "TC action name too long"); - err = -EINVAL; - goto err_out; + return ERR_PTR(-EINVAL); } } @@ -996,24 +978,56 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, * indicate this using -EAGAIN. */ if (a_o != NULL) { - err = -EAGAIN; - goto err_mod; + module_put(a_o->owner); + return ERR_PTR(-EAGAIN); } #endif NL_SET_ERR_MSG(extack, "Failed to load TC action module"); - err = -ENOENT; - goto err_free; + return ERR_PTR(-ENOENT); } + return a_o; +} + +struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, + struct nlattr *nla, struct nlattr *est, + char *name, int ovr, int bind, + struct tc_action_ops *a_o, bool rtnl_held, + struct netlink_ext_ack *extack) +{ + struct nla_bitfield32 flags = { 0, 0 }; + u8 hw_stats = TCA_ACT_HW_STATS_ANY; + struct nlattr *tb[TCA_ACT_MAX + 1]; + struct tc_cookie *cookie = NULL; + struct tc_action *a; + int err; + /* backward compatibility for policer */ - if (name == NULL) + if (name == NULL) { + err = nla_parse_nested_deprecated(tb, TCA_ACT_MAX, nla, + tcf_action_policy, extack); + if (err < 0) + return ERR_PTR(err); + if (tb[TCA_ACT_COOKIE]) { + cookie = nla_memdup_cookie(tb); + if (!cookie) { + NL_SET_ERR_MSG(extack, "No memory to generate TC cookie"); + err = -ENOMEM; + goto err_out; + } + } + hw_stats = tcf_action_hw_stats_get(tb[TCA_ACT_HW_STATS]); + if (tb[TCA_ACT_FLAGS]) + flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); + err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind, rtnl_held, tp, flags.value, extack); - else + } else { err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held, tp, flags.value, extack); + } if (err < 0) - goto err_mod; + goto err_out; if (!name && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(&a->act_cookie, cookie); @@ -1030,14 +1044,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, return a; -err_mod: - module_put(a_o->owner); -err_free: +err_out: if (cookie) { kfree(cookie->data); kfree(cookie); } -err_out: return ERR_PTR(err); } @@ -1048,6 +1059,7 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct tc_action *actions[], size_t *attr_size, bool rtnl_held, struct netlink_ext_ack *extack) { + struct tc_action_ops *ops[TCA_ACT_MAX_PRIO] = {}; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; size_t sz = 0; @@ -1059,9 +1071,20 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, if (err < 0) return err; + for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { + struct tc_action_ops *a_o; + + a_o = tc_action_load_ops(name, tb[i], rtnl_held, extack); + if (IS_ERR(a_o)) { + err = PTR_ERR(a_o); + goto err_mod; + } + ops[i - 1] = a_o; + } + for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { act = tcf_action_init_1(net, tp, tb[i], est, name, ovr, bind, - rtnl_held, extack); + ops[i - 1], rtnl_held, extack); if (IS_ERR(act)) { err = PTR_ERR(act); goto err; @@ -1081,6 +1104,11 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla, err: tcf_action_destroy(actions, bind); +err_mod: + for (i = 0; i < TCA_ACT_MAX_PRIO; i++) { + if (ops[i]) + module_put(ops[i]->owner); + } return err; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 37b77bd30974..e37556cc37ab 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3043,16 +3043,24 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, size_t attr_size = 0; if (exts->police && tb[exts->police]) { + struct tc_action_ops *a_o; + + a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack); + if (IS_ERR(a_o)) + return PTR_ERR(a_o); act = tcf_action_init_1(net, tp, tb[exts->police], rate_tlv, "police", ovr, - TCA_ACT_BIND, rtnl_held, + TCA_ACT_BIND, a_o, rtnl_held, extack); - if (IS_ERR(act)) + if (IS_ERR(act)) { + module_put(a_o->owner); return PTR_ERR(act); + } act->type = exts->type = TCA_OLD_COMPAT; exts->actions[0] = act; exts->nr_actions = 1; + tcf_idr_insert_many(exts->actions); } else if (exts->action && tb[exts->action]) { int err; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 84f932532db7..14316ba9b3b3 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -30,6 +30,11 @@ #include +#define TCA_FLOWER_KEY_CT_FLAGS_MAX \ + ((__TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) << 1) +#define TCA_FLOWER_KEY_CT_FLAGS_MASK \ + (TCA_FLOWER_KEY_CT_FLAGS_MAX - 1) + struct fl_flow_key { struct flow_dissector_key_meta meta; struct flow_dissector_key_control control; @@ -686,8 +691,10 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_ENC_OPTS] = { .type = NLA_NESTED }, [TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NLA_NESTED }, - [TCA_FLOWER_KEY_CT_STATE] = { .type = NLA_U16 }, - [TCA_FLOWER_KEY_CT_STATE_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_CT_STATE] = + NLA_POLICY_MASK(NLA_U16, TCA_FLOWER_KEY_CT_FLAGS_MASK), + [TCA_FLOWER_KEY_CT_STATE_MASK] = + NLA_POLICY_MASK(NLA_U16, TCA_FLOWER_KEY_CT_FLAGS_MASK), [TCA_FLOWER_KEY_CT_ZONE] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_CT_ZONE_MASK] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_CT_MARK] = { .type = NLA_U32 }, @@ -1390,12 +1397,33 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key, return 0; } +static int fl_validate_ct_state(u16 state, struct nlattr *tb, + struct netlink_ext_ack *extack) +{ + if (state && !(state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED)) { + NL_SET_ERR_MSG_ATTR(extack, tb, + "no trk, so no other flag can be set"); + return -EINVAL; + } + + if (state & TCA_FLOWER_KEY_CT_FLAGS_NEW && + state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED) { + NL_SET_ERR_MSG_ATTR(extack, tb, + "new and est are mutually exclusive"); + return -EINVAL; + } + + return 0; +} + static int fl_set_key_ct(struct nlattr **tb, struct flow_dissector_key_ct *key, struct flow_dissector_key_ct *mask, struct netlink_ext_ack *extack) { if (tb[TCA_FLOWER_KEY_CT_STATE]) { + int err; + if (!IS_ENABLED(CONFIG_NF_CONNTRACK)) { NL_SET_ERR_MSG(extack, "Conntrack isn't enabled"); return -EOPNOTSUPP; @@ -1403,6 +1431,13 @@ static int fl_set_key_ct(struct nlattr **tb, fl_set_key_val(tb, &key->ct_state, TCA_FLOWER_KEY_CT_STATE, &mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK, sizeof(key->ct_state)); + + err = fl_validate_ct_state(key->ct_state & mask->ct_state, + tb[TCA_FLOWER_KEY_CT_STATE_MASK], + extack); + if (err) + return err; + } if (tb[TCA_FLOWER_KEY_CT_ZONE]) { if (!IS_ENABLED(CONFIG_NF_CONNTRACK_ZONES)) { diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 6fe4e5cc807c..5f90ee76fd41 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -2167,7 +2167,7 @@ static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, struct tcmsg *tcm, struct netlink_callback *cb, - int *t_p, int s_t) + int *t_p, int s_t, bool recur) { struct Qdisc *q; int b; @@ -2178,7 +2178,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) return -1; - if (!qdisc_dev(root)) + if (!qdisc_dev(root) || !recur) return 0; if (tcm->tcm_parent) { @@ -2213,13 +2213,13 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) + if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0) goto done; dev_queue = dev_ingress_queue(dev); if (dev_queue && tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, - &t, s_t) < 0) + &t, s_t, false) < 0) goto done; done: diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index 50f680f03a54..2adbd945bf15 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -345,6 +345,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt, struct sk_buff **old = NULL; unsigned int mask; u32 max_P; + u8 *stab; if (opt == NULL) return -EINVAL; @@ -361,8 +362,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt, max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0; ctl = nla_data(tb[TCA_CHOKE_PARMS]); - - if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log)) + stab = nla_data(tb[TCA_CHOKE_STAB]); + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab)) return -EINVAL; if (ctl->limit > CHOKE_MAX_QUEUE) @@ -412,7 +413,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt, red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, ctl->Scell_log, - nla_data(tb[TCA_CHOKE_STAB]), + stab, max_P); red_set_vars(&q->vars); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index e0bc77533acc..f4132dc25ac0 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -480,7 +480,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp, struct gred_sched *table = qdisc_priv(sch); struct gred_sched_data *q = table->tab[dp]; - if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log)) { + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab)) { NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters"); return -EINVAL; } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index b4ae34d7aa96..40adf1f07a82 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -242,6 +242,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb, unsigned char flags; int err; u32 max_P; + u8 *stab; if (tb[TCA_RED_PARMS] == NULL || tb[TCA_RED_STAB] == NULL) @@ -250,7 +251,9 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb, max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0; ctl = nla_data(tb[TCA_RED_PARMS]); - if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log)) + stab = nla_data(tb[TCA_RED_STAB]); + if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, + ctl->Scell_log, stab)) return -EINVAL; err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS, @@ -288,7 +291,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb, red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog, ctl->Scell_log, - nla_data(tb[TCA_RED_STAB]), + stab, max_P); red_set_vars(&q->vars); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index b25e51440623..066754a18569 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -647,7 +647,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) } if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, - ctl_v1->Wlog, ctl_v1->Scell_log)) + ctl_v1->Wlog, ctl_v1->Scell_log, NULL)) return -EINVAL; if (ctl_v1 && ctl_v1->qth_min) { p = kmalloc(sizeof(*p), GFP_KERNEL); diff --git a/net/sctp/output.c b/net/sctp/output.c index 6614c9fdc51e..a6aa17df09ef 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -584,13 +584,6 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) goto out; } - rcu_read_lock(); - if (__sk_dst_get(sk) != tp->dst) { - dst_hold(tp->dst); - sk_setup_caps(sk, tp->dst); - } - rcu_read_unlock(); - /* pack up chunks */ pkt_count = sctp_packet_pack(packet, head, gso, gfp); if (!pkt_count) { diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 3fd06a27105d..5cb1aa5f067b 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1135,6 +1135,7 @@ static void sctp_outq_flush_data(struct sctp_flush_ctx *ctx, static void sctp_outq_flush_transports(struct sctp_flush_ctx *ctx) { + struct sock *sk = ctx->asoc->base.sk; struct list_head *ltransport; struct sctp_packet *packet; struct sctp_transport *t; @@ -1144,6 +1145,12 @@ static void sctp_outq_flush_transports(struct sctp_flush_ctx *ctx) t = list_entry(ltransport, struct sctp_transport, send_ready); packet = &t->packet; if (!sctp_packet_empty(packet)) { + rcu_read_lock(); + if (t->dst && __sk_dst_get(sk) != t->dst) { + dst_hold(t->dst); + sk_setup_caps(sk, t->dst); + } + rcu_read_unlock(); error = sctp_packet_transmit(packet, ctx->gfp); if (error < 0) ctx->q->asoc->base.sk->sk_err = -error; diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index cf702a5f7fe5..39ed0e0afe6d 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -963,8 +963,11 @@ void rpc_execute(struct rpc_task *task) rpc_set_active(task); rpc_make_runnable(rpciod_workqueue, task); - if (!is_async) + if (!is_async) { + unsigned int pflags = memalloc_nofs_save(); __rpc_execute(task); + memalloc_nofs_restore(pflags); + } } static void rpc_async_schedule(struct work_struct *work) diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 4187745887f0..7034b4755fa1 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1413,7 +1413,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) sendit: if (svc_authorise(rqstp)) - goto close; + goto close_xprt; return 1; /* Caller can now send it */ release_dropit: @@ -1425,6 +1425,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) return 0; close: + svc_authorise(rqstp); +close_xprt: if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) svc_close_xprt(rqstp->rq_xprt); dprintk("svc: svc_process close\n"); @@ -1433,7 +1435,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) err_short_len: svc_printk(rqstp, "short len %zd, dropping request\n", argv->iov_len); - goto close; + goto close_xprt; err_bad_rpc: serv->sv_stats->rpcbadfmt++; diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index dcc50ae54550..3cdd71a8df1e 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1060,7 +1060,7 @@ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, st struct svc_xprt *xprt; int ret = 0; - spin_lock(&serv->sv_lock); + spin_lock_bh(&serv->sv_lock); list_for_each_entry(xprt, xprt_list, xpt_list) { if (xprt->xpt_net != net) continue; @@ -1068,7 +1068,7 @@ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, st set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); } - spin_unlock(&serv->sv_lock); + spin_unlock_bh(&serv->sv_lock); return ret; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index 63f8be974df2..8186ab6f99f1 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -252,9 +252,9 @@ xprt_setup_rdma_bc(struct xprt_create *args) xprt->timeout = &xprt_rdma_bc_timeout; xprt_set_bound(xprt); xprt_set_connected(xprt); - xprt->bind_timeout = RPCRDMA_BIND_TO; - xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; - xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO; + xprt->bind_timeout = 0; + xprt->reestablish_timeout = 0; + xprt->idle_timeout = 0; xprt->prot = XPRT_TRANSPORT_BC_RDMA; xprt->ops = &xprt_rdma_bc_procs; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index afba4e9d5425..c895f80df659 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -475,9 +475,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) if (!svc_rdma_post_recvs(newxprt)) goto errout; - /* Swap out the handler */ - newxprt->sc_cm_id->event_handler = svc_rdma_cma_handler; - /* Construct RDMA-CM private message */ pmsg.cp_magic = rpcrdma_cmp_magic; pmsg.cp_version = RPCRDMA_CMP_VERSION; @@ -498,7 +495,10 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) } conn_param.private_data = &pmsg; conn_param.private_data_len = sizeof(pmsg); + rdma_lock_handler(newxprt->sc_cm_id); + newxprt->sc_cm_id->event_handler = svc_rdma_cma_handler; ret = rdma_accept(newxprt->sc_cm_id, &conn_param); + rdma_unlock_handler(newxprt->sc_cm_id); if (ret) { trace_svcrdma_accept_err(newxprt, ret); goto errout; diff --git a/net/tipc/node.c b/net/tipc/node.c index 008670d1f43e..136338b85504 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2895,17 +2895,22 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, #ifdef CONFIG_TIPC_CRYPTO static int tipc_nl_retrieve_key(struct nlattr **attrs, - struct tipc_aead_key **key) + struct tipc_aead_key **pkey) { struct nlattr *attr = attrs[TIPC_NLA_NODE_KEY]; + struct tipc_aead_key *key; if (!attr) return -ENODATA; - *key = (struct tipc_aead_key *)nla_data(attr); - if (nla_len(attr) < tipc_aead_key_size(*key)) + if (nla_len(attr) < sizeof(*key)) + return -EINVAL; + key = (struct tipc_aead_key *)nla_data(attr); + if (key->keylen > TIPC_AEAD_KEYLEN_MAX || + nla_len(attr) < tipc_aead_key_size(key)) return -EINVAL; + *pkey = key; return 0; } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 5546710d8ac1..bc7fb9bf3351 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -755,6 +755,7 @@ static struct sock *__vsock_create(struct net *net, vsk->buffer_size = psk->buffer_size; vsk->buffer_min_size = psk->buffer_min_size; vsk->buffer_max_size = psk->buffer_max_size; + security_sk_clone(parent, sk); } else { vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN); vsk->owner = get_current_cred(); diff --git a/samples/Kconfig b/samples/Kconfig index 0ed6e4d71d87..e76cdfc50e25 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -210,7 +210,7 @@ config SAMPLE_WATCHDOG depends on CC_CAN_LINK config SAMPLE_WATCH_QUEUE - bool "Build example /dev/watch_queue notification consumer" + bool "Build example watch_queue notification API consumer" depends on CC_CAN_LINK && HEADERS_INSTALL help Build example userspace program to use the new mount_notify(), diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index db0cb73513a5..1e2a1105d0e6 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -1699,5 +1699,7 @@ int main(int argc, char **argv) xdpsock_cleanup(); + munmap(bufs, NUM_FRAMES * opt_xsk_frame_size); + return 0; } diff --git a/samples/watch_queue/watch_test.c b/samples/watch_queue/watch_test.c index 46e618a897fe..8c6cb57d5cfc 100644 --- a/samples/watch_queue/watch_test.c +++ b/samples/watch_queue/watch_test.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Use /dev/watch_queue to watch for notifications. +/* Use watch_queue API to watch for notifications. * * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) diff --git a/scripts/dummy-tools/gcc b/scripts/dummy-tools/gcc index 5c113cad5601..0d0589cf8184 100755 --- a/scripts/dummy-tools/gcc +++ b/scripts/dummy-tools/gcc @@ -85,3 +85,8 @@ if arg_contain -print-file-name=plugin "$@"; then echo $plugin_dir exit 0 fi + +# inverted return value +if arg_contain -D__SIZEOF_INT128__=0 "$@"; then + exit 1 +fi diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index b9c2ee7ab43f..cce12e1971d8 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -438,7 +438,7 @@ static int arm_is_fake_mcount(Elf32_Rel const *rp) static int arm64_is_fake_mcount(Elf64_Rel const *rp) { - return ELF64_R_TYPE(w(rp->r_info)) != R_AARCH64_CALL26; + return ELF64_R_TYPE(w8(rp->r_info)) != R_AARCH64_CALL26; } /* 64-bit EM_MIPS has weird ELF64_Rela.r_info. diff --git a/security/integrity/evm/evm_crypto.c b/security/integrity/evm/evm_crypto.c index 168c3b78ac47..a6dd47eb086d 100644 --- a/security/integrity/evm/evm_crypto.c +++ b/security/integrity/evm/evm_crypto.c @@ -73,7 +73,7 @@ static struct shash_desc *init_desc(char type, uint8_t hash_algo) { long rc; const char *algo; - struct crypto_shash **tfm, *tmp_tfm; + struct crypto_shash **tfm, *tmp_tfm = NULL; struct shash_desc *desc; if (type == EVM_XATTR_HMAC) { @@ -118,13 +118,16 @@ static struct shash_desc *init_desc(char type, uint8_t hash_algo) alloc: desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(*tfm), GFP_KERNEL); - if (!desc) + if (!desc) { + crypto_free_shash(tmp_tfm); return ERR_PTR(-ENOMEM); + } desc->tfm = *tfm; rc = crypto_shash_init(desc); if (rc) { + crypto_free_shash(tmp_tfm); kfree(desc); return ERR_PTR(rc); } diff --git a/security/integrity/iint.c b/security/integrity/iint.c index 1d20003243c3..0ba01847e836 100644 --- a/security/integrity/iint.c +++ b/security/integrity/iint.c @@ -98,6 +98,14 @@ struct integrity_iint_cache *integrity_inode_get(struct inode *inode) struct rb_node *node, *parent = NULL; struct integrity_iint_cache *iint, *test_iint; + /* + * The integrity's "iint_cache" is initialized at security_init(), + * unless it is not included in the ordered list of LSMs enabled + * on the boot command line. + */ + if (!iint_cache) + panic("%s: lsm=integrity required.\n", __func__); + iint = integrity_iint_find(inode); if (iint) return iint; diff --git a/security/integrity/ima/ima_kexec.c b/security/integrity/ima/ima_kexec.c index 121de3e04af2..e29bea3dd4cc 100644 --- a/security/integrity/ima/ima_kexec.c +++ b/security/integrity/ima/ima_kexec.c @@ -119,6 +119,7 @@ void ima_add_kexec_buffer(struct kimage *image) ret = kexec_add_buffer(&kbuf); if (ret) { pr_err("Error passing over kexec measurement buffer.\n"); + vfree(kexec_buffer); return; } @@ -128,6 +129,8 @@ void ima_add_kexec_buffer(struct kimage *image) return; } + image->ima_buffer = kexec_buffer; + pr_debug("kexec measurement buffer for the loaded kernel at 0x%lx.\n", kbuf.mem); } diff --git a/security/integrity/ima/ima_mok.c b/security/integrity/ima/ima_mok.c index 36cadadbfba4..1e5c01916173 100644 --- a/security/integrity/ima/ima_mok.c +++ b/security/integrity/ima/ima_mok.c @@ -38,13 +38,12 @@ __init int ima_mok_init(void) (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ | KEY_USR_WRITE | KEY_USR_SEARCH, - KEY_ALLOC_NOT_IN_QUOTA, + KEY_ALLOC_NOT_IN_QUOTA | + KEY_ALLOC_SET_KEEP, restriction, NULL); if (IS_ERR(ima_blacklist_keyring)) panic("Can't allocate IMA blacklist keyring."); - - set_bit(KEY_FLAG_KEEP, &ima_blacklist_keyring->flags); return 0; } device_initcall(ima_mok_init); diff --git a/security/keys/Kconfig b/security/keys/Kconfig index 83bc23409164..c161642a8484 100644 --- a/security/keys/Kconfig +++ b/security/keys/Kconfig @@ -119,7 +119,7 @@ config KEY_NOTIFICATIONS bool "Provide key/keyring change notifications" depends on KEYS && WATCH_QUEUE help - This option provides support for getting change notifications on keys - and keyrings on which the caller has View permission. This makes use - of the /dev/watch_queue misc device to handle the notification - buffer and provides KEYCTL_WATCH_KEY to enable/disable watches. + This option provides support for getting change notifications + on keys and keyrings on which the caller has View permission. + This makes use of pipes to handle the notification buffer and + provides KEYCTL_WATCH_KEY to enable/disable watches. diff --git a/security/keys/key.c b/security/keys/key.c index ebe752b137aa..c45afdd1dfbb 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -303,6 +303,8 @@ struct key *key_alloc(struct key_type *type, const char *desc, key->flags |= 1 << KEY_FLAG_BUILTIN; if (flags & KEY_ALLOC_UID_KEYRING) key->flags |= 1 << KEY_FLAG_UID_KEYRING; + if (flags & KEY_ALLOC_SET_KEEP) + key->flags |= 1 << KEY_FLAG_KEEP; #ifdef KEY_DEBUGGING key->magic = KEY_DEBUG_MAGIC; diff --git a/security/keys/trusted-keys/trusted_tpm1.c b/security/keys/trusted-keys/trusted_tpm1.c index 74d82093cbaa..493eb91ed017 100644 --- a/security/keys/trusted-keys/trusted_tpm1.c +++ b/security/keys/trusted-keys/trusted_tpm1.c @@ -403,9 +403,12 @@ static int osap(struct tpm_buf *tb, struct osapsess *s, int ret; ret = tpm_get_random(chip, ononce, TPM_NONCE_SIZE); - if (ret != TPM_NONCE_SIZE) + if (ret < 0) return ret; + if (ret != TPM_NONCE_SIZE) + return -EIO; + tpm_buf_reset(tb, TPM_TAG_RQU_COMMAND, TPM_ORD_OSAP); tpm_buf_append_u16(tb, type); tpm_buf_append_u32(tb, handle); @@ -496,8 +499,12 @@ static int tpm_seal(struct tpm_buf *tb, uint16_t keytype, goto out; ret = tpm_get_random(chip, td->nonceodd, TPM_NONCE_SIZE); + if (ret < 0) + return ret; + if (ret != TPM_NONCE_SIZE) - goto out; + return -EIO; + ordinal = htonl(TPM_ORD_SEAL); datsize = htonl(datalen); pcrsize = htonl(pcrinfosize); @@ -601,9 +608,12 @@ static int tpm_unseal(struct tpm_buf *tb, ordinal = htonl(TPM_ORD_UNSEAL); ret = tpm_get_random(chip, nonceodd, TPM_NONCE_SIZE); + if (ret < 0) + return ret; + if (ret != TPM_NONCE_SIZE) { pr_info("trusted_key: tpm_get_random failed (%d)\n", ret); - return ret; + return -EIO; } ret = TSS_authhmac(authdata1, keyauth, TPM_NONCE_SIZE, enonce1, nonceodd, cont, sizeof(uint32_t), @@ -791,7 +801,7 @@ static int getoptions(char *c, struct trusted_key_payload *pay, case Opt_migratable: if (*args[0].from == '0') pay->migratable = 0; - else + else if (*args[0].from != '1') return -EINVAL; break; case Opt_pcrlock: @@ -1013,8 +1023,12 @@ static int trusted_instantiate(struct key *key, case Opt_new: key_len = payload->key_len; ret = tpm_get_random(chip, payload->key, key_len); + if (ret < 0) + goto out; + if (ret != key_len) { pr_info("trusted_key: key_create failed (%d)\n", ret); + ret = -EIO; goto out; } if (tpm2) diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c index 08ec7f48f01d..e2a0ed5d02f0 100644 --- a/security/keys/trusted-keys/trusted_tpm2.c +++ b/security/keys/trusted-keys/trusted_tpm2.c @@ -83,6 +83,12 @@ int tpm2_seal_trusted(struct tpm_chip *chip, if (rc) return rc; + rc = tpm_buf_init(&buf, TPM2_ST_SESSIONS, TPM2_CC_CREATE); + if (rc) { + tpm_put_ops(chip); + return rc; + } + tpm_buf_append_u32(&buf, options->keyhandle); tpm2_buf_append_auth(&buf, TPM2_RS_PW, NULL /* nonce */, 0, @@ -130,7 +136,7 @@ int tpm2_seal_trusted(struct tpm_chip *chip, goto out; } - rc = tpm_send(chip, buf.data, tpm_buf_length(&buf)); + rc = tpm_transmit_cmd(chip, &buf, 4, "sealing data"); if (rc) goto out; @@ -157,6 +163,7 @@ int tpm2_seal_trusted(struct tpm_chip *chip, rc = -EPERM; } + tpm_put_ops(chip); return rc; } @@ -211,7 +218,7 @@ static int tpm2_load_cmd(struct tpm_chip *chip, goto out; } - rc = tpm_send(chip, buf.data, tpm_buf_length(&buf)); + rc = tpm_transmit_cmd(chip, &buf, 4, "loading blob"); if (!rc) *blob_handle = be32_to_cpup( (__be32 *) &buf.data[TPM_HEADER_SIZE]); @@ -260,7 +267,7 @@ static int tpm2_unseal_cmd(struct tpm_chip *chip, options->blobauth /* hmac */, TPM_DIGEST_SIZE); - rc = tpm_send(chip, buf.data, tpm_buf_length(&buf)); + rc = tpm_transmit_cmd(chip, &buf, 6, "unsealing"); if (rc > 0) rc = -EPERM; @@ -304,12 +311,19 @@ int tpm2_unseal_trusted(struct tpm_chip *chip, u32 blob_handle; int rc; - rc = tpm2_load_cmd(chip, payload, options, &blob_handle); + rc = tpm_try_get_ops(chip); if (rc) return rc; + rc = tpm2_load_cmd(chip, payload, options, &blob_handle); + if (rc) + goto out; + rc = tpm2_unseal_cmd(chip, payload, options, blob_handle); tpm2_flush_context(chip, blob_handle); +out: + tpm_put_ops(chip); + return rc; } diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 644b17ec9e63..95a3c1eda9e4 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3413,6 +3413,10 @@ static int selinux_inode_setsecurity(struct inode *inode, const char *name, static int selinux_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size) { const int len = sizeof(XATTR_NAME_SELINUX); + + if (!selinux_initialized(&selinux_state)) + return 0; + if (buffer && len <= buffer_size) memcpy(buffer, XATTR_NAME_SELINUX, len); return len; diff --git a/security/selinux/include/security.h b/security/selinux/include/security.h index 3cc8bab31ea8..63ca6e79daeb 100644 --- a/security/selinux/include/security.h +++ b/security/selinux/include/security.h @@ -219,14 +219,21 @@ static inline bool selinux_policycap_genfs_seclabel_symlinks(void) return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS]); } +struct selinux_policy_convert_data; + +struct selinux_load_state { + struct selinux_policy *policy; + struct selinux_policy_convert_data *convert_data; +}; + int security_mls_enabled(struct selinux_state *state); int security_load_policy(struct selinux_state *state, - void *data, size_t len, - struct selinux_policy **newpolicyp); + void *data, size_t len, + struct selinux_load_state *load_state); void selinux_policy_commit(struct selinux_state *state, - struct selinux_policy *newpolicy); + struct selinux_load_state *load_state); void selinux_policy_cancel(struct selinux_state *state, - struct selinux_policy *policy); + struct selinux_load_state *load_state); int security_read_policy(struct selinux_state *state, void **data, size_t *len); diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 4bde570d56a2..2b745ae8cb98 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -616,7 +616,7 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, { struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info; - struct selinux_policy *newpolicy; + struct selinux_load_state load_state; ssize_t length; void *data = NULL; @@ -642,23 +642,22 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf, if (copy_from_user(data, buf, count) != 0) goto out; - length = security_load_policy(fsi->state, data, count, &newpolicy); + length = security_load_policy(fsi->state, data, count, &load_state); if (length) { pr_warn_ratelimited("SELinux: failed to load policy\n"); goto out; } - length = sel_make_policy_nodes(fsi, newpolicy); + length = sel_make_policy_nodes(fsi, load_state.policy); if (length) { - selinux_policy_cancel(fsi->state, newpolicy); - goto out1; + selinux_policy_cancel(fsi->state, &load_state); + goto out; } - selinux_policy_commit(fsi->state, newpolicy); + selinux_policy_commit(fsi->state, &load_state); length = count; -out1: audit_log(audit_context(), GFP_KERNEL, AUDIT_MAC_POLICY_LOAD, "auid=%u ses=%u lsm=selinux res=1", from_kuid(&init_user_ns, audit_get_loginuid(current)), diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 597b79703584..8d9bbd39ab9a 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -66,6 +66,17 @@ #include "audit.h" #include "policycap_names.h" +struct convert_context_args { + struct selinux_state *state; + struct policydb *oldp; + struct policydb *newp; +}; + +struct selinux_policy_convert_data { + struct convert_context_args args; + struct sidtab_convert_params sidtab_params; +}; + /* Forward declaration. */ static int context_struct_to_string(struct policydb *policydb, struct context *context, @@ -1973,12 +1984,6 @@ static inline int convert_context_handle_invalid_context( return 0; } -struct convert_context_args { - struct selinux_state *state; - struct policydb *oldp; - struct policydb *newp; -}; - /* * Convert the values in the security context * structure `oldc' from the values specified @@ -2158,7 +2163,7 @@ static void selinux_policy_cond_free(struct selinux_policy *policy) } void selinux_policy_cancel(struct selinux_state *state, - struct selinux_policy *policy) + struct selinux_load_state *load_state) { struct selinux_policy *oldpolicy; @@ -2166,7 +2171,8 @@ void selinux_policy_cancel(struct selinux_state *state, lockdep_is_held(&state->policy_mutex)); sidtab_cancel_convert(oldpolicy->sidtab); - selinux_policy_free(policy); + selinux_policy_free(load_state->policy); + kfree(load_state->convert_data); } static void selinux_notify_policy_change(struct selinux_state *state, @@ -2181,9 +2187,9 @@ static void selinux_notify_policy_change(struct selinux_state *state, } void selinux_policy_commit(struct selinux_state *state, - struct selinux_policy *newpolicy) + struct selinux_load_state *load_state) { - struct selinux_policy *oldpolicy; + struct selinux_policy *oldpolicy, *newpolicy = load_state->policy; u32 seqno; oldpolicy = rcu_dereference_protected(state->policy, @@ -2223,6 +2229,7 @@ void selinux_policy_commit(struct selinux_state *state, /* Free the old policy */ synchronize_rcu(); selinux_policy_free(oldpolicy); + kfree(load_state->convert_data); /* Notify others of the policy change */ selinux_notify_policy_change(state, seqno); @@ -2239,11 +2246,10 @@ void selinux_policy_commit(struct selinux_state *state, * loading the new policy. */ int security_load_policy(struct selinux_state *state, void *data, size_t len, - struct selinux_policy **newpolicyp) + struct selinux_load_state *load_state) { struct selinux_policy *newpolicy, *oldpolicy; - struct sidtab_convert_params convert_params; - struct convert_context_args args; + struct selinux_policy_convert_data *convert_data; int rc = 0; struct policy_file file = { data, len }, *fp = &file; @@ -2273,10 +2279,10 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len, goto err_mapping; } - if (!selinux_initialized(state)) { /* First policy load, so no need to preserve state from old policy */ - *newpolicyp = newpolicy; + load_state->policy = newpolicy; + load_state->convert_data = NULL; return 0; } @@ -2290,29 +2296,38 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len, goto err_free_isids; } + convert_data = kmalloc(sizeof(*convert_data), GFP_KERNEL); + if (!convert_data) { + rc = -ENOMEM; + goto err_free_isids; + } + /* * Convert the internal representations of contexts * in the new SID table. */ - args.state = state; - args.oldp = &oldpolicy->policydb; - args.newp = &newpolicy->policydb; + convert_data->args.state = state; + convert_data->args.oldp = &oldpolicy->policydb; + convert_data->args.newp = &newpolicy->policydb; - convert_params.func = convert_context; - convert_params.args = &args; - convert_params.target = newpolicy->sidtab; + convert_data->sidtab_params.func = convert_context; + convert_data->sidtab_params.args = &convert_data->args; + convert_data->sidtab_params.target = newpolicy->sidtab; - rc = sidtab_convert(oldpolicy->sidtab, &convert_params); + rc = sidtab_convert(oldpolicy->sidtab, &convert_data->sidtab_params); if (rc) { pr_err("SELinux: unable to convert the internal" " representation of contexts in the new SID" " table\n"); - goto err_free_isids; + goto err_free_convert_data; } - *newpolicyp = newpolicy; + load_state->policy = newpolicy; + load_state->convert_data = convert_data; return 0; +err_free_convert_data: + kfree(convert_data); err_free_isids: sidtab_destroy(newpolicy->sidtab); err_mapping: diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index 5d44b7d258ef..22ded2c26089 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -1167,7 +1167,7 @@ static ssize_t smk_write_net4addr(struct file *file, const char __user *buf, return -EPERM; if (*ppos != 0) return -EINVAL; - if (count < SMK_NETLBLADDRMIN) + if (count < SMK_NETLBLADDRMIN || count > PAGE_SIZE - 1) return -EINVAL; data = memdup_user_nul(buf, count); @@ -1427,7 +1427,7 @@ static ssize_t smk_write_net6addr(struct file *file, const char __user *buf, return -EPERM; if (*ppos != 0) return -EINVAL; - if (count < SMK_NETLBLADDRMIN) + if (count < SMK_NETLBLADDRMIN || count > PAGE_SIZE - 1) return -EINVAL; data = memdup_user_nul(buf, count); @@ -1834,6 +1834,10 @@ static ssize_t smk_write_ambient(struct file *file, const char __user *buf, if (!smack_privileged(CAP_MAC_ADMIN)) return -EPERM; + /* Enough data must be present */ + if (count == 0 || count > PAGE_SIZE) + return -EINVAL; + data = memdup_user_nul(buf, count); if (IS_ERR(data)) return PTR_ERR(data); @@ -2005,6 +2009,9 @@ static ssize_t smk_write_onlycap(struct file *file, const char __user *buf, if (!smack_privileged(CAP_MAC_ADMIN)) return -EPERM; + if (count > PAGE_SIZE) + return -EINVAL; + data = memdup_user_nul(buf, count); if (IS_ERR(data)) return PTR_ERR(data); @@ -2092,6 +2099,9 @@ static ssize_t smk_write_unconfined(struct file *file, const char __user *buf, if (!smack_privileged(CAP_MAC_ADMIN)) return -EPERM; + if (count > PAGE_SIZE) + return -EINVAL; + data = memdup_user_nul(buf, count); if (IS_ERR(data)) return PTR_ERR(data); @@ -2648,6 +2658,10 @@ static ssize_t smk_write_syslog(struct file *file, const char __user *buf, if (!smack_privileged(CAP_MAC_ADMIN)) return -EPERM; + /* Enough data must be present */ + if (count == 0 || count > PAGE_SIZE) + return -EINVAL; + data = memdup_user_nul(buf, count); if (IS_ERR(data)) return PTR_ERR(data); @@ -2740,10 +2754,13 @@ static ssize_t smk_write_relabel_self(struct file *file, const char __user *buf, return -EPERM; /* + * No partial write. * Enough data must be present. */ if (*ppos != 0) return -EINVAL; + if (count == 0 || count > PAGE_SIZE) + return -EINVAL; data = memdup_user_nul(buf, count); if (IS_ERR(data)) diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c index 051f7297877c..1e6077568fde 100644 --- a/security/tomoyo/file.c +++ b/security/tomoyo/file.c @@ -362,14 +362,14 @@ static bool tomoyo_merge_path_acl(struct tomoyo_acl_info *a, { u16 * const a_perm = &container_of(a, struct tomoyo_path_acl, head) ->perm; - u16 perm = *a_perm; + u16 perm = READ_ONCE(*a_perm); const u16 b_perm = container_of(b, struct tomoyo_path_acl, head)->perm; if (is_delete) perm &= ~b_perm; else perm |= b_perm; - *a_perm = perm; + WRITE_ONCE(*a_perm, perm); return !perm; } @@ -437,7 +437,7 @@ static bool tomoyo_merge_mkdev_acl(struct tomoyo_acl_info *a, { u8 *const a_perm = &container_of(a, struct tomoyo_mkdev_acl, head)->perm; - u8 perm = *a_perm; + u8 perm = READ_ONCE(*a_perm); const u8 b_perm = container_of(b, struct tomoyo_mkdev_acl, head) ->perm; @@ -445,7 +445,7 @@ static bool tomoyo_merge_mkdev_acl(struct tomoyo_acl_info *a, perm &= ~b_perm; else perm |= b_perm; - *a_perm = perm; + WRITE_ONCE(*a_perm, perm); return !perm; } @@ -517,14 +517,14 @@ static bool tomoyo_merge_path2_acl(struct tomoyo_acl_info *a, { u8 * const a_perm = &container_of(a, struct tomoyo_path2_acl, head) ->perm; - u8 perm = *a_perm; + u8 perm = READ_ONCE(*a_perm); const u8 b_perm = container_of(b, struct tomoyo_path2_acl, head)->perm; if (is_delete) perm &= ~b_perm; else perm |= b_perm; - *a_perm = perm; + WRITE_ONCE(*a_perm, perm); return !perm; } @@ -655,7 +655,7 @@ static bool tomoyo_merge_path_number_acl(struct tomoyo_acl_info *a, { u8 * const a_perm = &container_of(a, struct tomoyo_path_number_acl, head)->perm; - u8 perm = *a_perm; + u8 perm = READ_ONCE(*a_perm); const u8 b_perm = container_of(b, struct tomoyo_path_number_acl, head) ->perm; @@ -663,7 +663,7 @@ static bool tomoyo_merge_path_number_acl(struct tomoyo_acl_info *a, perm &= ~b_perm; else perm |= b_perm; - *a_perm = perm; + WRITE_ONCE(*a_perm, perm); return !perm; } diff --git a/security/tomoyo/network.c b/security/tomoyo/network.c index f9ff121d7e1e..478f757ff843 100644 --- a/security/tomoyo/network.c +++ b/security/tomoyo/network.c @@ -233,14 +233,14 @@ static bool tomoyo_merge_inet_acl(struct tomoyo_acl_info *a, { u8 * const a_perm = &container_of(a, struct tomoyo_inet_acl, head)->perm; - u8 perm = *a_perm; + u8 perm = READ_ONCE(*a_perm); const u8 b_perm = container_of(b, struct tomoyo_inet_acl, head)->perm; if (is_delete) perm &= ~b_perm; else perm |= b_perm; - *a_perm = perm; + WRITE_ONCE(*a_perm, perm); return !perm; } @@ -259,14 +259,14 @@ static bool tomoyo_merge_unix_acl(struct tomoyo_acl_info *a, { u8 * const a_perm = &container_of(a, struct tomoyo_unix_acl, head)->perm; - u8 perm = *a_perm; + u8 perm = READ_ONCE(*a_perm); const u8 b_perm = container_of(b, struct tomoyo_unix_acl, head)->perm; if (is_delete) perm &= ~b_perm; else perm |= b_perm; - *a_perm = perm; + WRITE_ONCE(*a_perm, perm); return !perm; } @@ -613,7 +613,7 @@ static int tomoyo_check_unix_address(struct sockaddr *addr, static bool tomoyo_kernel_service(void) { /* Nothing to do if I am a kernel service. */ - return uaccess_kernel(); + return (current->flags & (PF_KTHREAD | PF_IO_WORKER)) == PF_KTHREAD; } /** diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c index 176b803ebcfc..e89cac913583 100644 --- a/security/tomoyo/util.c +++ b/security/tomoyo/util.c @@ -1058,30 +1058,30 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) if (ptr->is_deleted) continue; + /* + * Reading perm bitmap might race with tomoyo_merge_*() because + * caller does not hold tomoyo_policy_lock mutex. But exceeding + * max_learning_entry parameter by a few entries does not harm. + */ switch (ptr->type) { case TOMOYO_TYPE_PATH_ACL: - perm = container_of(ptr, struct tomoyo_path_acl, head) - ->perm; + data_race(perm = container_of(ptr, struct tomoyo_path_acl, head)->perm); break; case TOMOYO_TYPE_PATH2_ACL: - perm = container_of(ptr, struct tomoyo_path2_acl, head) - ->perm; + data_race(perm = container_of(ptr, struct tomoyo_path2_acl, head)->perm); break; case TOMOYO_TYPE_PATH_NUMBER_ACL: - perm = container_of(ptr, struct tomoyo_path_number_acl, - head)->perm; + data_race(perm = container_of(ptr, struct tomoyo_path_number_acl, head) + ->perm); break; case TOMOYO_TYPE_MKDEV_ACL: - perm = container_of(ptr, struct tomoyo_mkdev_acl, - head)->perm; + data_race(perm = container_of(ptr, struct tomoyo_mkdev_acl, head)->perm); break; case TOMOYO_TYPE_INET_ACL: - perm = container_of(ptr, struct tomoyo_inet_acl, - head)->perm; + data_race(perm = container_of(ptr, struct tomoyo_inet_acl, head)->perm); break; case TOMOYO_TYPE_UNIX_ACL: - perm = container_of(ptr, struct tomoyo_unix_acl, - head)->perm; + data_race(perm = container_of(ptr, struct tomoyo_unix_acl, head)->perm); break; case TOMOYO_TYPE_MANUAL_TASK_ACL: perm = 0; diff --git a/sound/core/init.c b/sound/core/init.c index 75aec71c48a8..cc8208df26f3 100644 --- a/sound/core/init.c +++ b/sound/core/init.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -416,6 +417,9 @@ int snd_card_disconnect(struct snd_card *card) /* notify all devices that we are disconnected */ snd_device_disconnect_all(card); + if (card->sync_irq > 0) + synchronize_irq(card->sync_irq); + snd_info_card_disconnect(card); if (card->registered) { device_del(&card->card_dev); diff --git a/sound/core/pcm.c b/sound/core/pcm.c index be5714f1bb58..41cbdac5b1cf 100644 --- a/sound/core/pcm.c +++ b/sound/core/pcm.c @@ -1111,6 +1111,10 @@ static int snd_pcm_dev_disconnect(struct snd_device *device) } } + for (cidx = 0; cidx < 2; cidx++) + for (substream = pcm->streams[cidx].substream; substream; substream = substream->next) + snd_pcm_sync_stop(substream, false); + pcm_call_notify(pcm, n_disconnect); for (cidx = 0; cidx < 2; cidx++) { snd_unregister_device(&pcm->streams[cidx].dev); diff --git a/sound/core/pcm_local.h b/sound/core/pcm_local.h index 17a1a5d87098..b3e8be5aeafb 100644 --- a/sound/core/pcm_local.h +++ b/sound/core/pcm_local.h @@ -63,6 +63,7 @@ static inline void snd_pcm_timer_done(struct snd_pcm_substream *substream) {} void __snd_pcm_xrun(struct snd_pcm_substream *substream); void snd_pcm_group_init(struct snd_pcm_group *group); +void snd_pcm_sync_stop(struct snd_pcm_substream *substream, bool sync_irq); #ifdef CONFIG_SND_DMA_SGBUF struct page *snd_pcm_sgbuf_ops_page(struct snd_pcm_substream *substream, diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index c4aac703dc22..c6f65ee8142d 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -583,13 +583,13 @@ static inline void snd_pcm_timer_notify(struct snd_pcm_substream *substream, #endif } -static void snd_pcm_sync_stop(struct snd_pcm_substream *substream) +void snd_pcm_sync_stop(struct snd_pcm_substream *substream, bool sync_irq) { - if (substream->runtime->stop_operating) { + if (substream->runtime && substream->runtime->stop_operating) { substream->runtime->stop_operating = false; - if (substream->ops->sync_stop) + if (substream->ops && substream->ops->sync_stop) substream->ops->sync_stop(substream); - else if (substream->pcm->card->sync_irq > 0) + else if (sync_irq && substream->pcm->card->sync_irq > 0) synchronize_irq(substream->pcm->card->sync_irq); } } @@ -686,7 +686,7 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream, if (atomic_read(&substream->mmap_count)) return -EBADFD; - snd_pcm_sync_stop(substream); + snd_pcm_sync_stop(substream, true); params->rmask = ~0U; err = snd_pcm_hw_refine(substream, params); @@ -809,7 +809,7 @@ static int do_hw_free(struct snd_pcm_substream *substream) { int result = 0; - snd_pcm_sync_stop(substream); + snd_pcm_sync_stop(substream, true); if (substream->ops->hw_free) result = substream->ops->hw_free(substream); if (substream->managed_buffer_alloc) @@ -1421,8 +1421,10 @@ static int snd_pcm_do_stop(struct snd_pcm_substream *substream, snd_pcm_state_t state) { if (substream->runtime->trigger_master == substream && - snd_pcm_running(substream)) + snd_pcm_running(substream)) { substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_STOP); + substream->runtime->stop_operating = true; + } return 0; /* unconditonally stop all substreams */ } @@ -1435,7 +1437,6 @@ static void snd_pcm_post_stop(struct snd_pcm_substream *substream, runtime->status->state = state; snd_pcm_timer_notify(substream, SNDRV_TIMER_EVENT_MSTOP); } - runtime->stop_operating = true; wake_up(&runtime->sleep); wake_up(&runtime->tsleep); } @@ -1615,6 +1616,7 @@ static int snd_pcm_do_suspend(struct snd_pcm_substream *substream, if (! snd_pcm_running(substream)) return 0; substream->ops->trigger(substream, SNDRV_PCM_TRIGGER_SUSPEND); + runtime->stop_operating = true; return 0; /* suspend unconditionally */ } @@ -1691,6 +1693,12 @@ int snd_pcm_suspend_all(struct snd_pcm *pcm) return err; } } + + for (stream = 0; stream < 2; stream++) + for (substream = pcm->streams[stream].substream; + substream; substream = substream->next) + snd_pcm_sync_stop(substream, false); + return 0; } EXPORT_SYMBOL(snd_pcm_suspend_all); @@ -1736,7 +1744,6 @@ static void snd_pcm_post_resume(struct snd_pcm_substream *substream, snd_pcm_trigger_tstamp(substream); runtime->status->state = runtime->status->suspended_state; snd_pcm_timer_notify(substream, SNDRV_TIMER_EVENT_MRESUME); - snd_pcm_sync_stop(substream); } static const struct action_ops snd_pcm_action_resume = { @@ -1866,7 +1873,7 @@ static int snd_pcm_do_prepare(struct snd_pcm_substream *substream, snd_pcm_state_t state) { int err; - snd_pcm_sync_stop(substream); + snd_pcm_sync_stop(substream, true); err = substream->ops->prepare(substream); if (err < 0) return err; diff --git a/sound/firewire/dice/dice-stream.c b/sound/firewire/dice/dice-stream.c index 8e0c0380b4c4..1a14c083e8ce 100644 --- a/sound/firewire/dice/dice-stream.c +++ b/sound/firewire/dice/dice-stream.c @@ -493,11 +493,10 @@ void snd_dice_stream_stop_duplex(struct snd_dice *dice) struct reg_params tx_params, rx_params; if (dice->substreams_counter == 0) { - if (get_register_params(dice, &tx_params, &rx_params) >= 0) { - amdtp_domain_stop(&dice->domain); + if (get_register_params(dice, &tx_params, &rx_params) >= 0) finish_session(dice, &tx_params, &rx_params); - } + amdtp_domain_stop(&dice->domain); release_resources(dice); } } diff --git a/sound/firewire/fireface/ff-protocol-latter.c b/sound/firewire/fireface/ff-protocol-latter.c index 8d3b23778eb2..7ddb7b97f02d 100644 --- a/sound/firewire/fireface/ff-protocol-latter.c +++ b/sound/firewire/fireface/ff-protocol-latter.c @@ -15,6 +15,61 @@ #define LATTER_FETCH_MODE 0xffff00000010ULL #define LATTER_SYNC_STATUS 0x0000801c0000ULL +// The content of sync status register differs between models. +// +// Fireface UCX: +// 0xf0000000: (unidentified) +// 0x0f000000: effective rate of sampling clock +// 0x00f00000: detected rate of word clock on BNC interface +// 0x000f0000: detected rate of ADAT or S/PDIF on optical interface +// 0x0000f000: detected rate of S/PDIF on coaxial interface +// 0x00000e00: effective source of sampling clock +// 0x00000e00: Internal +// 0x00000800: (unidentified) +// 0x00000600: Word clock on BNC interface +// 0x00000400: ADAT on optical interface +// 0x00000200: S/PDIF on coaxial or optical interface +// 0x00000100: Optical interface is used for ADAT signal +// 0x00000080: (unidentified) +// 0x00000040: Synchronized to word clock on BNC interface +// 0x00000020: Synchronized to ADAT or S/PDIF on optical interface +// 0x00000010: Synchronized to S/PDIF on coaxial interface +// 0x00000008: (unidentified) +// 0x00000004: Lock word clock on BNC interface +// 0x00000002: Lock ADAT or S/PDIF on optical interface +// 0x00000001: Lock S/PDIF on coaxial interface +// +// Fireface 802 (and perhaps UFX): +// 0xf0000000: effective rate of sampling clock +// 0x0f000000: detected rate of ADAT-B on 2nd optical interface +// 0x00f00000: detected rate of ADAT-A on 1st optical interface +// 0x000f0000: detected rate of AES/EBU on XLR or coaxial interface +// 0x0000f000: detected rate of word clock on BNC interface +// 0x00000e00: effective source of sampling clock +// 0x00000e00: internal +// 0x00000800: ADAT-B +// 0x00000600: ADAT-A +// 0x00000400: AES/EBU +// 0x00000200: Word clock +// 0x00000080: Synchronized to ADAT-B on 2nd optical interface +// 0x00000040: Synchronized to ADAT-A on 1st optical interface +// 0x00000020: Synchronized to AES/EBU on XLR or 2nd optical interface +// 0x00000010: Synchronized to word clock on BNC interface +// 0x00000008: Lock ADAT-B on 2nd optical interface +// 0x00000004: Lock ADAT-A on 1st optical interface +// 0x00000002: Lock AES/EBU on XLR or 2nd optical interface +// 0x00000001: Lock word clock on BNC interface +// +// The pattern for rate bits: +// 0x00: 32.0 kHz +// 0x01: 44.1 kHz +// 0x02: 48.0 kHz +// 0x04: 64.0 kHz +// 0x05: 88.2 kHz +// 0x06: 96.0 kHz +// 0x08: 128.0 kHz +// 0x09: 176.4 kHz +// 0x0a: 192.0 kHz static int parse_clock_bits(u32 data, unsigned int *rate, enum snd_ff_clock_src *src, enum snd_ff_unit_version unit_version) @@ -23,35 +78,48 @@ static int parse_clock_bits(u32 data, unsigned int *rate, unsigned int rate; u32 flag; } *rate_entry, rate_entries[] = { - { 32000, 0x00000000, }, - { 44100, 0x01000000, }, - { 48000, 0x02000000, }, - { 64000, 0x04000000, }, - { 88200, 0x05000000, }, - { 96000, 0x06000000, }, - { 128000, 0x08000000, }, - { 176400, 0x09000000, }, - { 192000, 0x0a000000, }, + { 32000, 0x00, }, + { 44100, 0x01, }, + { 48000, 0x02, }, + { 64000, 0x04, }, + { 88200, 0x05, }, + { 96000, 0x06, }, + { 128000, 0x08, }, + { 176400, 0x09, }, + { 192000, 0x0a, }, }; static const struct { enum snd_ff_clock_src src; u32 flag; - } *clk_entry, clk_entries[] = { + } *clk_entry, *clk_entries, ucx_clk_entries[] = { { SND_FF_CLOCK_SRC_SPDIF, 0x00000200, }, { SND_FF_CLOCK_SRC_ADAT1, 0x00000400, }, { SND_FF_CLOCK_SRC_WORD, 0x00000600, }, { SND_FF_CLOCK_SRC_INTERNAL, 0x00000e00, }, + }, ufx_ff802_clk_entries[] = { + { SND_FF_CLOCK_SRC_WORD, 0x00000200, }, + { SND_FF_CLOCK_SRC_SPDIF, 0x00000400, }, + { SND_FF_CLOCK_SRC_ADAT1, 0x00000600, }, + { SND_FF_CLOCK_SRC_ADAT2, 0x00000800, }, + { SND_FF_CLOCK_SRC_INTERNAL, 0x00000e00, }, }; + u32 rate_bits; + unsigned int clk_entry_count; int i; - if (unit_version != SND_FF_UNIT_VERSION_UCX) { - // e.g. 0x00fe0f20 but expected 0x00eff002. - data = ((data & 0xf0f0f0f0) >> 4) | ((data & 0x0f0f0f0f) << 4); + if (unit_version == SND_FF_UNIT_VERSION_UCX) { + rate_bits = (data & 0x0f000000) >> 24; + clk_entries = ucx_clk_entries; + clk_entry_count = ARRAY_SIZE(ucx_clk_entries); + } else { + rate_bits = (data & 0xf0000000) >> 28; + clk_entries = ufx_ff802_clk_entries; + clk_entry_count = ARRAY_SIZE(ufx_ff802_clk_entries); } for (i = 0; i < ARRAY_SIZE(rate_entries); ++i) { rate_entry = rate_entries + i; - if ((data & 0x0f000000) == rate_entry->flag) { + if (rate_bits == rate_entry->flag) { *rate = rate_entry->rate; break; } @@ -59,14 +127,14 @@ static int parse_clock_bits(u32 data, unsigned int *rate, if (i == ARRAY_SIZE(rate_entries)) return -EIO; - for (i = 0; i < ARRAY_SIZE(clk_entries); ++i) { + for (i = 0; i < clk_entry_count; ++i) { clk_entry = clk_entries + i; if ((data & 0x000e00) == clk_entry->flag) { *src = clk_entry->src; break; } } - if (i == ARRAY_SIZE(clk_entries)) + if (i == clk_entry_count) return -EIO; return 0; @@ -249,16 +317,22 @@ static void latter_dump_status(struct snd_ff *ff, struct snd_info_buffer *buffer char *const label; u32 locked_mask; u32 synced_mask; - } *clk_entry, clk_entries[] = { + } *clk_entry, *clk_entries, ucx_clk_entries[] = { { "S/PDIF", 0x00000001, 0x00000010, }, { "ADAT", 0x00000002, 0x00000020, }, { "WDClk", 0x00000004, 0x00000040, }, + }, ufx_ff802_clk_entries[] = { + { "WDClk", 0x00000001, 0x00000010, }, + { "AES/EBU", 0x00000002, 0x00000020, }, + { "ADAT-A", 0x00000004, 0x00000040, }, + { "ADAT-B", 0x00000008, 0x00000080, }, }; __le32 reg; u32 data; unsigned int rate; enum snd_ff_clock_src src; const char *label; + unsigned int clk_entry_count; int i; int err; @@ -270,7 +344,15 @@ static void latter_dump_status(struct snd_ff *ff, struct snd_info_buffer *buffer snd_iprintf(buffer, "External source detection:\n"); - for (i = 0; i < ARRAY_SIZE(clk_entries); ++i) { + if (ff->unit_version == SND_FF_UNIT_VERSION_UCX) { + clk_entries = ucx_clk_entries; + clk_entry_count = ARRAY_SIZE(ucx_clk_entries); + } else { + clk_entries = ufx_ff802_clk_entries; + clk_entry_count = ARRAY_SIZE(ufx_ff802_clk_entries); + } + + for (i = 0; i < clk_entry_count; ++i) { clk_entry = clk_entries + i; snd_iprintf(buffer, "%s: ", clk_entry->label); if (data & clk_entry->locked_mask) { diff --git a/sound/hda/intel-nhlt.c b/sound/hda/intel-nhlt.c index 059aaf04f536..e2237239d922 100644 --- a/sound/hda/intel-nhlt.c +++ b/sound/hda/intel-nhlt.c @@ -31,18 +31,49 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt) struct nhlt_endpoint *epnt; struct nhlt_dmic_array_config *cfg; struct nhlt_vendor_dmic_array_config *cfg_vendor; + struct nhlt_fmt *fmt_configs; unsigned int dmic_geo = 0; - u8 j; + u16 max_ch = 0; + u8 i, j; if (!nhlt) return 0; - epnt = (struct nhlt_endpoint *)nhlt->desc; + if (nhlt->header.length <= sizeof(struct acpi_table_header)) { + dev_warn(dev, "Invalid DMIC description table\n"); + return 0; + } + + for (j = 0, epnt = nhlt->desc; j < nhlt->endpoint_count; j++, + epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length)) { + + if (epnt->linktype != NHLT_LINK_DMIC) + continue; + + cfg = (struct nhlt_dmic_array_config *)(epnt->config.caps); + fmt_configs = (struct nhlt_fmt *)(epnt->config.caps + epnt->config.size); + + /* find max number of channels based on format_configuration */ + if (fmt_configs->fmt_count) { + dev_dbg(dev, "%s: found %d format definitions\n", + __func__, fmt_configs->fmt_count); + + for (i = 0; i < fmt_configs->fmt_count; i++) { + struct wav_fmt_ext *fmt_ext; + + fmt_ext = &fmt_configs->fmt_config[i].fmt_ext; - for (j = 0; j < nhlt->endpoint_count; j++) { - if (epnt->linktype == NHLT_LINK_DMIC) { - cfg = (struct nhlt_dmic_array_config *) - (epnt->config.caps); + if (fmt_ext->fmt.channels > max_ch) + max_ch = fmt_ext->fmt.channels; + } + dev_dbg(dev, "%s: max channels found %d\n", __func__, max_ch); + } else { + dev_dbg(dev, "%s: No format information found\n", __func__); + } + + if (cfg->device_config.config_type != NHLT_CONFIG_TYPE_MIC_ARRAY) { + dmic_geo = max_ch; + } else { switch (cfg->array_type) { case NHLT_MIC_ARRAY_2CH_SMALL: case NHLT_MIC_ARRAY_2CH_BIG: @@ -59,13 +90,23 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt) dmic_geo = cfg_vendor->nb_mics; break; default: - dev_warn(dev, "undefined DMIC array_type 0x%0x\n", - cfg->array_type); + dev_warn(dev, "%s: undefined DMIC array_type 0x%0x\n", + __func__, cfg->array_type); + } + + if (dmic_geo > 0) { + dev_dbg(dev, "%s: Array with %d dmics\n", __func__, dmic_geo); + } + if (max_ch > dmic_geo) { + dev_dbg(dev, "%s: max channels %d exceed dmic number %d\n", + __func__, max_ch, dmic_geo); } } - epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length); } + dev_dbg(dev, "%s: dmic number %d max_ch %d\n", + __func__, dmic_geo, max_ch); + return dmic_geo; } EXPORT_SYMBOL_GPL(intel_nhlt_get_dmic_geo); diff --git a/sound/pci/ctxfi/cthw20k2.c b/sound/pci/ctxfi/cthw20k2.c index fc1bc18caee9..85d1fc76f59e 100644 --- a/sound/pci/ctxfi/cthw20k2.c +++ b/sound/pci/ctxfi/cthw20k2.c @@ -991,7 +991,7 @@ static int daio_mgr_dao_init(void *blk, unsigned int idx, unsigned int conf) if (idx < 4) { /* S/PDIF output */ - switch ((conf & 0x7)) { + switch ((conf & 0xf)) { case 1: set_field(&ctl->txctl[idx], ATXCTL_NUC, 0); break; diff --git a/sound/pci/hda/hda_bind.c b/sound/pci/hda/hda_bind.c index 6a8564566375..17a25e453f60 100644 --- a/sound/pci/hda/hda_bind.c +++ b/sound/pci/hda/hda_bind.c @@ -47,6 +47,10 @@ static void hda_codec_unsol_event(struct hdac_device *dev, unsigned int ev) if (codec->bus->shutdown) return; + /* ignore unsol events during system suspend/resume */ + if (codec->core.dev.power.power_state.event != PM_EVENT_ON) + return; + if (codec->patch_ops.unsol_event) codec->patch_ops.unsol_event(codec, ev); } diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c index 80016b7b6849..b972d59eb1ec 100644 --- a/sound/pci/hda/hda_controller.c +++ b/sound/pci/hda/hda_controller.c @@ -609,13 +609,6 @@ static int azx_pcm_open(struct snd_pcm_substream *substream) 20, 178000000); - /* by some reason, the playback stream stalls on PulseAudio with - * tsched=1 when a capture stream triggers. Until we figure out the - * real cause, disable tsched mode by telling the PCM info flag. - */ - if (chip->driver_caps & AZX_DCAPS_AMD_WORKAROUND) - runtime->hw.info |= SNDRV_PCM_INFO_BATCH; - if (chip->align_buffer_size) /* constrain buffer sizes to be multiple of 128 bytes. This is more efficient in terms of memory diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index 8060cc86dfea..96903295a967 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -4065,7 +4065,7 @@ static int add_micmute_led_hook(struct hda_codec *codec) spec->micmute_led.led_mode = MICMUTE_LED_FOLLOW_MUTE; spec->micmute_led.capture = 0; - spec->micmute_led.led_value = 0; + spec->micmute_led.led_value = -1; spec->micmute_led.old_hook = spec->cap_sync_hook; spec->cap_sync_hook = update_micmute_led; if (!snd_hda_gen_add_kctl(spec, NULL, &micmute_led_mode_ctl)) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 5a50d3a46445..253d538251ae 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -1026,6 +1026,8 @@ static int azx_prepare(struct device *dev) chip = card->private_data; chip->pm_prepared = 1; + flush_work(&azx_bus(chip)->unsol_work); + /* HDA controller always requires different WAKEEN for runtime suspend * and system suspend, so don't use direct-complete here. */ @@ -2481,6 +2483,8 @@ static const struct pci_device_id azx_ids[] = { /* CometLake-H */ { PCI_DEVICE(0x8086, 0x06C8), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, + { PCI_DEVICE(0x8086, 0xf1c8), + .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, /* CometLake-S */ { PCI_DEVICE(0x8086, 0xa3f0), .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE}, diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 7e62aed172a9..65057a184559 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -1309,6 +1309,7 @@ static const struct snd_pci_quirk ca0132_quirks[] = { SND_PCI_QUIRK(0x1102, 0x0013, "Recon3D", QUIRK_R3D), SND_PCI_QUIRK(0x1102, 0x0018, "Recon3D", QUIRK_R3D), SND_PCI_QUIRK(0x1102, 0x0051, "Sound Blaster AE-5", QUIRK_AE5), + SND_PCI_QUIRK(0x1102, 0x0191, "Sound Blaster AE-5 Plus", QUIRK_AE5), SND_PCI_QUIRK(0x1102, 0x0081, "Sound Blaster AE-7", QUIRK_AE7), {} }; diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index d49cc4409d59..a980a4eda51c 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -149,6 +149,21 @@ static int cx_auto_vmaster_mute_led(struct led_classdev *led_cdev, return 0; } +static void cxt_init_gpio_led(struct hda_codec *codec) +{ + struct conexant_spec *spec = codec->spec; + unsigned int mask = spec->gpio_mute_led_mask | spec->gpio_mic_led_mask; + + if (mask) { + snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_MASK, + mask); + snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DIRECTION, + mask); + snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, + spec->gpio_led); + } +} + static int cx_auto_init(struct hda_codec *codec) { struct conexant_spec *spec = codec->spec; @@ -156,6 +171,7 @@ static int cx_auto_init(struct hda_codec *codec) if (!spec->dynamic_eapd) cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, true); + cxt_init_gpio_led(codec); snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_INIT); return 0; @@ -215,6 +231,7 @@ enum { CXT_FIXUP_HP_SPECTRE, CXT_FIXUP_HP_GATE_MIC, CXT_FIXUP_MUTE_LED_GPIO, + CXT_FIXUP_HP_ZBOOK_MUTE_LED, CXT_FIXUP_HEADSET_MIC, CXT_FIXUP_HP_MIC_NO_PRESENCE, }; @@ -654,31 +671,36 @@ static int cxt_gpio_micmute_update(struct led_classdev *led_cdev, return 0; } - -static void cxt_fixup_mute_led_gpio(struct hda_codec *codec, - const struct hda_fixup *fix, int action) +static void cxt_setup_mute_led(struct hda_codec *codec, + unsigned int mute, unsigned int mic_mute) { struct conexant_spec *spec = codec->spec; - static const struct hda_verb gpio_init[] = { - { 0x01, AC_VERB_SET_GPIO_MASK, 0x03 }, - { 0x01, AC_VERB_SET_GPIO_DIRECTION, 0x03 }, - {} - }; - if (action == HDA_FIXUP_ACT_PRE_PROBE) { + spec->gpio_led = 0; + spec->mute_led_polarity = 0; + if (mute) { snd_hda_gen_add_mute_led_cdev(codec, cxt_gpio_mute_update); - spec->gpio_led = 0; - spec->mute_led_polarity = 0; - spec->gpio_mute_led_mask = 0x01; - spec->gpio_mic_led_mask = 0x02; + spec->gpio_mute_led_mask = mute; + } + if (mic_mute) { snd_hda_gen_add_micmute_led_cdev(codec, cxt_gpio_micmute_update); + spec->gpio_mic_led_mask = mic_mute; } - snd_hda_add_verbs(codec, gpio_init); - if (spec->gpio_led) - snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, - spec->gpio_led); } +static void cxt_fixup_mute_led_gpio(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) + cxt_setup_mute_led(codec, 0x01, 0x02); +} + +static void cxt_fixup_hp_zbook_mute_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + if (action == HDA_FIXUP_ACT_PRE_PROBE) + cxt_setup_mute_led(codec, 0x10, 0x20); +} /* ThinkPad X200 & co with cxt5051 */ static const struct hda_pintbl cxt_pincfg_lenovo_x200[] = { @@ -839,6 +861,10 @@ static const struct hda_fixup cxt_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_mute_led_gpio, }, + [CXT_FIXUP_HP_ZBOOK_MUTE_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = cxt_fixup_hp_zbook_mute_led, + }, [CXT_FIXUP_HEADSET_MIC] = { .type = HDA_FIXUP_FUNC, .v.func = cxt_fixup_headset_mic, @@ -917,6 +943,7 @@ static const struct snd_pci_quirk cxt5066_fixups[] = { SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8402, "HP ProBook 645 G4", CXT_FIXUP_MUTE_LED_GPIO), + SND_PCI_QUIRK(0x103c, 0x8427, "HP ZBook Studio G5", CXT_FIXUP_HP_ZBOOK_MUTE_LED), SND_PCI_QUIRK(0x103c, 0x8455, "HP Z2 G4", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8456, "HP Z2 G4 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x103c, 0x8457, "HP Z2 G4 mini", CXT_FIXUP_HP_MIC_NO_PRESENCE), @@ -956,6 +983,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = { { .id = CXT_FIXUP_MUTE_LED_EAPD, .name = "mute-led-eapd" }, { .id = CXT_FIXUP_HP_DOCK, .name = "hp-dock" }, { .id = CXT_FIXUP_MUTE_LED_GPIO, .name = "mute-led-gpio" }, + { .id = CXT_FIXUP_HP_ZBOOK_MUTE_LED, .name = "hp-zbook-mute-led" }, { .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" }, {} }; diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 97adff0cbcab..d6387106619f 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -2130,7 +2130,6 @@ static int hdmi_pcm_close(struct hda_pcm_stream *hinfo, goto unlock; } per_cvt = get_cvt(spec, cvt_idx); - snd_BUG_ON(!per_cvt->assigned); per_cvt->assigned = 0; hinfo->nid = 0; @@ -2473,6 +2472,18 @@ static void generic_hdmi_free(struct hda_codec *codec) } #ifdef CONFIG_PM +static int generic_hdmi_suspend(struct hda_codec *codec) +{ + struct hdmi_spec *spec = codec->spec; + int pin_idx; + + for (pin_idx = 0; pin_idx < spec->num_pins; pin_idx++) { + struct hdmi_spec_per_pin *per_pin = get_pin(spec, pin_idx); + cancel_delayed_work_sync(&per_pin->work); + } + return 0; +} + static int generic_hdmi_resume(struct hda_codec *codec) { struct hdmi_spec *spec = codec->spec; @@ -2496,6 +2507,7 @@ static const struct hda_codec_ops generic_hdmi_patch_ops = { .build_controls = generic_hdmi_build_controls, .unsol_event = hdmi_unsol_event, #ifdef CONFIG_PM + .suspend = generic_hdmi_suspend, .resume = generic_hdmi_resume, #endif }; diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 290645516313..316b9b4ccb32 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -1905,6 +1905,7 @@ enum { ALC889_FIXUP_FRONT_HP_NO_PRESENCE, ALC889_FIXUP_VAIO_TT, ALC888_FIXUP_EEE1601, + ALC886_FIXUP_EAPD, ALC882_FIXUP_EAPD, ALC883_FIXUP_EAPD, ALC883_FIXUP_ACER_EAPD, @@ -2238,6 +2239,15 @@ static const struct hda_fixup alc882_fixups[] = { { } } }, + [ALC886_FIXUP_EAPD] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + /* change to EAPD mode */ + { 0x20, AC_VERB_SET_COEF_INDEX, 0x07 }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x0068 }, + { } + } + }, [ALC882_FIXUP_EAPD] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -2510,6 +2520,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x106b, 0x4a00, "Macbook 5,2", ALC889_FIXUP_MBA11_VREF), SND_PCI_QUIRK(0x1071, 0x8258, "Evesham Voyaeger", ALC882_FIXUP_EAPD), + SND_PCI_QUIRK(0x13fe, 0x1009, "Advantech MIT-W101", ALC886_FIXUP_EAPD), SND_PCI_QUIRK(0x1458, 0xa002, "Gigabyte EP45-DS3/Z87X-UD3H", ALC889_FIXUP_FRONT_HP_NO_PRESENCE), SND_PCI_QUIRK(0x1458, 0xa0b8, "Gigabyte AZ370-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK(0x1458, 0xa0cd, "Gigabyte X570 Aorus Master", ALC1220_FIXUP_CLEVO_P950), @@ -2521,6 +2532,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x1293, "MSI-GP65", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD), + SND_PCI_QUIRK(0x1462, 0xcc34, "MSI Godlike X570", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS), SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), @@ -4213,6 +4225,12 @@ static void alc_fixup_hp_gpio_led(struct hda_codec *codec, } } +static void alc236_fixup_hp_gpio_led(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + alc_fixup_hp_gpio_led(codec, action, 0x02, 0x01); +} + static void alc269_fixup_hp_gpio_led(struct hda_codec *codec, const struct hda_fixup *fix, int action) { @@ -4280,6 +4298,28 @@ static void alc280_fixup_hp_gpio4(struct hda_codec *codec, } } +/* HP Spectre x360 14 model needs a unique workaround for enabling the amp; + * it needs to toggle the GPIO0 once on and off at each time (bko#210633) + */ +static void alc245_fixup_hp_x360_amp(struct hda_codec *codec, + const struct hda_fixup *fix, int action) +{ + struct alc_spec *spec = codec->spec; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + spec->gpio_mask |= 0x01; + spec->gpio_dir |= 0x01; + break; + case HDA_FIXUP_ACT_INIT: + /* need to toggle GPIO to enable the amp */ + alc_update_gpio_data(codec, 0x01, true); + msleep(100); + alc_update_gpio_data(codec, 0x01, false); + break; + } +} + static void alc_update_coef_led(struct hda_codec *codec, struct alc_coef_led *led, bool polarity, bool on) @@ -6266,6 +6306,7 @@ enum { ALC280_FIXUP_HP_DOCK_PINS, ALC269_FIXUP_HP_DOCK_GPIO_MIC1_LED, ALC280_FIXUP_HP_9480M, + ALC245_FIXUP_HP_X360_AMP, ALC288_FIXUP_DELL_HEADSET_MODE, ALC288_FIXUP_DELL1_MIC_NO_PRESENCE, ALC288_FIXUP_DELL_XPS_13, @@ -6346,6 +6387,7 @@ enum { ALC294_FIXUP_ASUS_GX502_VERBS, ALC285_FIXUP_HP_GPIO_LED, ALC285_FIXUP_HP_MUTE_LED, + ALC236_FIXUP_HP_GPIO_LED, ALC236_FIXUP_HP_MUTE_LED, ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, ALC295_FIXUP_ASUS_MIC_NO_PRESENCE, @@ -6362,6 +6404,7 @@ enum { ALC269_FIXUP_LEMOTE_A1802, ALC269_FIXUP_LEMOTE_A190X, ALC256_FIXUP_INTEL_NUC8_RUGGED, + ALC256_FIXUP_INTEL_NUC10, ALC255_FIXUP_XIAOMI_HEADSET_MIC, ALC274_FIXUP_HP_MIC, ALC274_FIXUP_HP_HEADSET_MIC, @@ -6372,6 +6415,7 @@ enum { ALC236_FIXUP_DELL_AIO_HEADSET_MIC, ALC282_FIXUP_ACER_DISABLE_LINEOUT, ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST, + ALC256_FIXUP_ACER_HEADSET_MIC, }; static const struct hda_fixup alc269_fixups[] = { @@ -6971,6 +7015,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc280_fixup_hp_9480m, }, + [ALC245_FIXUP_HP_X360_AMP] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc245_fixup_hp_x360_amp, + }, [ALC288_FIXUP_DELL_HEADSET_MODE] = { .type = HDA_FIXUP_FUNC, .v.func = alc_fixup_headset_mode_dell_alc288, @@ -7575,6 +7623,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc285_fixup_hp_mute_led, }, + [ALC236_FIXUP_HP_GPIO_LED] = { + .type = HDA_FIXUP_FUNC, + .v.func = alc236_fixup_hp_gpio_led, + }, [ALC236_FIXUP_HP_MUTE_LED] = { .type = HDA_FIXUP_FUNC, .v.func = alc236_fixup_hp_mute_led, @@ -7744,6 +7796,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE }, + [ALC256_FIXUP_INTEL_NUC10] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE + }, [ALC255_FIXUP_XIAOMI_HEADSET_MIC] = { .type = HDA_FIXUP_VERBS, .v.verbs = (const struct hda_verb[]) { @@ -7815,6 +7876,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC255_FIXUP_ACER_MIC_NO_PRESENCE, }, + [ALC256_FIXUP_ACER_HEADSET_MIC] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x02a1113c }, /* use as headset mic, without its own jack detect */ + { 0x1a, 0x90a1092f }, /* use as internal mic */ + { } + }, + .chained = true, + .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7841,9 +7912,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x1246, "Acer Predator Helios 500", ALC299_FIXUP_PREDATOR_SPK), SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS), SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1025, 0x1269, "Acer SWIFT SF314-54", ALC256_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC), @@ -7983,8 +8056,12 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8783, "HP ZBook Fury 15 G7 Mobile Workstation", ALC285_FIXUP_HP_GPIO_AMP_INIT), SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP), + SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), @@ -8089,6 +8166,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1558, 0x8551, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8560, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC), SND_PCI_QUIRK(0x1558, 0x8561, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC), + SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[5|7][0-9]RZ[Q]", ALC269_FIXUP_DMIC), SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE), @@ -8178,11 +8256,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1b35, 0x1237, "CZC L101", ALC269_FIXUP_CZC_L101), SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */ SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC), + SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC), + SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC), SND_PCI_QUIRK(0x10ec, 0x118c, "Medion EE4254 MD62100", ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE), SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802), SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X), SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED), + SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10), #if 0 /* Below is a quirk table taken from the old code. @@ -8357,6 +8438,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET, .name = "alc298-samsung-headphone"}, {.id = ALC255_FIXUP_XIAOMI_HEADSET_MIC, .name = "alc255-xiaomi-headset"}, {.id = ALC274_FIXUP_HP_MIC, .name = "alc274-hp-mic-detect"}, + {.id = ALC245_FIXUP_HP_X360_AMP, .name = "alc245-hp-x360-amp"}, {} }; #define ALC225_STANDARD_PINS \ diff --git a/sound/soc/codecs/ak4458.c b/sound/soc/codecs/ak4458.c index 472caad17012..85a1d00894a9 100644 --- a/sound/soc/codecs/ak4458.c +++ b/sound/soc/codecs/ak4458.c @@ -812,6 +812,7 @@ static const struct of_device_id ak4458_of_match[] = { { .compatible = "asahi-kasei,ak4497", .data = &ak4497_drvdata}, { }, }; +MODULE_DEVICE_TABLE(of, ak4458_of_match); static struct i2c_driver ak4458_i2c_driver = { .driver = { diff --git a/sound/soc/codecs/ak5558.c b/sound/soc/codecs/ak5558.c index 8a32b0139cb0..85bdd0534180 100644 --- a/sound/soc/codecs/ak5558.c +++ b/sound/soc/codecs/ak5558.c @@ -419,6 +419,7 @@ static const struct of_device_id ak5558_i2c_dt_ids[] __maybe_unused = { { .compatible = "asahi-kasei,ak5558"}, { } }; +MODULE_DEVICE_TABLE(of, ak5558_i2c_dt_ids); static struct i2c_driver ak5558_i2c_driver = { .driver = { diff --git a/sound/soc/codecs/cpcap.c b/sound/soc/codecs/cpcap.c index f046987ee4cd..c0425e3707d9 100644 --- a/sound/soc/codecs/cpcap.c +++ b/sound/soc/codecs/cpcap.c @@ -1264,12 +1264,12 @@ static int cpcap_voice_hw_params(struct snd_pcm_substream *substream, if (direction == SNDRV_PCM_STREAM_CAPTURE) { mask = 0x0000; - mask |= CPCAP_BIT_MIC1_RX_TIMESLOT0; - mask |= CPCAP_BIT_MIC1_RX_TIMESLOT1; - mask |= CPCAP_BIT_MIC1_RX_TIMESLOT2; - mask |= CPCAP_BIT_MIC2_TIMESLOT0; - mask |= CPCAP_BIT_MIC2_TIMESLOT1; - mask |= CPCAP_BIT_MIC2_TIMESLOT2; + mask |= BIT(CPCAP_BIT_MIC1_RX_TIMESLOT0); + mask |= BIT(CPCAP_BIT_MIC1_RX_TIMESLOT1); + mask |= BIT(CPCAP_BIT_MIC1_RX_TIMESLOT2); + mask |= BIT(CPCAP_BIT_MIC2_TIMESLOT0); + mask |= BIT(CPCAP_BIT_MIC2_TIMESLOT1); + mask |= BIT(CPCAP_BIT_MIC2_TIMESLOT2); val = 0x0000; if (channels >= 2) val = BIT(CPCAP_BIT_MIC1_RX_TIMESLOT0); diff --git a/sound/soc/codecs/cs42l56.c b/sound/soc/codecs/cs42l56.c index bb9599cc832b..c44a5cdb796e 100644 --- a/sound/soc/codecs/cs42l56.c +++ b/sound/soc/codecs/cs42l56.c @@ -1250,6 +1250,7 @@ static int cs42l56_i2c_probe(struct i2c_client *i2c_client, dev_err(&i2c_client->dev, "CS42L56 Device ID (%X). Expected %X\n", devid, CS42L56_DEVID); + ret = -EINVAL; goto err_enable; } alpha_rev = reg & CS42L56_AREV_MASK; @@ -1307,7 +1308,7 @@ static int cs42l56_i2c_probe(struct i2c_client *i2c_client, ret = devm_snd_soc_register_component(&i2c_client->dev, &soc_component_dev_cs42l56, &cs42l56_dai, 1); if (ret < 0) - return ret; + goto err_enable; return 0; diff --git a/sound/soc/codecs/lpass-va-macro.c b/sound/soc/codecs/lpass-va-macro.c index 91e6890d6efc..3d6976a3d9e4 100644 --- a/sound/soc/codecs/lpass-va-macro.c +++ b/sound/soc/codecs/lpass-va-macro.c @@ -189,7 +189,6 @@ struct va_macro { struct device *dev; unsigned long active_ch_mask[VA_MACRO_MAX_DAIS]; unsigned long active_ch_cnt[VA_MACRO_MAX_DAIS]; - unsigned long active_decimator[VA_MACRO_MAX_DAIS]; u16 dmic_clk_div; int dec_mode[VA_MACRO_NUM_DECIMATORS]; @@ -549,11 +548,9 @@ static int va_macro_tx_mixer_put(struct snd_kcontrol *kcontrol, if (enable) { set_bit(dec_id, &va->active_ch_mask[dai_id]); va->active_ch_cnt[dai_id]++; - va->active_decimator[dai_id] = dec_id; } else { clear_bit(dec_id, &va->active_ch_mask[dai_id]); va->active_ch_cnt[dai_id]--; - va->active_decimator[dai_id] = -1; } snd_soc_dapm_mixer_update_power(widget->dapm, kcontrol, enable, update); @@ -880,18 +877,19 @@ static int va_macro_digital_mute(struct snd_soc_dai *dai, int mute, int stream) struct va_macro *va = snd_soc_component_get_drvdata(component); u16 tx_vol_ctl_reg, decimator; - decimator = va->active_decimator[dai->id]; - - tx_vol_ctl_reg = CDC_VA_TX0_TX_PATH_CTL + - VA_MACRO_TX_PATH_OFFSET * decimator; - if (mute) - snd_soc_component_update_bits(component, tx_vol_ctl_reg, - CDC_VA_TX_PATH_PGA_MUTE_EN_MASK, - CDC_VA_TX_PATH_PGA_MUTE_EN); - else - snd_soc_component_update_bits(component, tx_vol_ctl_reg, - CDC_VA_TX_PATH_PGA_MUTE_EN_MASK, - CDC_VA_TX_PATH_PGA_MUTE_DISABLE); + for_each_set_bit(decimator, &va->active_ch_mask[dai->id], + VA_MACRO_DEC_MAX) { + tx_vol_ctl_reg = CDC_VA_TX0_TX_PATH_CTL + + VA_MACRO_TX_PATH_OFFSET * decimator; + if (mute) + snd_soc_component_update_bits(component, tx_vol_ctl_reg, + CDC_VA_TX_PATH_PGA_MUTE_EN_MASK, + CDC_VA_TX_PATH_PGA_MUTE_EN); + else + snd_soc_component_update_bits(component, tx_vol_ctl_reg, + CDC_VA_TX_PATH_PGA_MUTE_EN_MASK, + CDC_VA_TX_PATH_PGA_MUTE_DISABLE); + } return 0; } diff --git a/sound/soc/codecs/lpass-wsa-macro.c b/sound/soc/codecs/lpass-wsa-macro.c index 25f1df214ca5..cd59aa439373 100644 --- a/sound/soc/codecs/lpass-wsa-macro.c +++ b/sound/soc/codecs/lpass-wsa-macro.c @@ -1214,14 +1214,16 @@ static int wsa_macro_enable_mix_path(struct snd_soc_dapm_widget *w, struct snd_kcontrol *kcontrol, int event) { struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm); - u16 gain_reg; + u16 path_reg, gain_reg; int val; - switch (w->reg) { - case CDC_WSA_RX0_RX_PATH_MIX_CTL: + switch (w->shift) { + case WSA_MACRO_RX_MIX0: + path_reg = CDC_WSA_RX0_RX_PATH_MIX_CTL; gain_reg = CDC_WSA_RX0_RX_VOL_MIX_CTL; break; - case CDC_WSA_RX1_RX_PATH_MIX_CTL: + case WSA_MACRO_RX_MIX1: + path_reg = CDC_WSA_RX1_RX_PATH_MIX_CTL; gain_reg = CDC_WSA_RX1_RX_VOL_MIX_CTL; break; default: @@ -1234,7 +1236,7 @@ static int wsa_macro_enable_mix_path(struct snd_soc_dapm_widget *w, snd_soc_component_write(component, gain_reg, val); break; case SND_SOC_DAPM_POST_PMD: - snd_soc_component_update_bits(component, w->reg, + snd_soc_component_update_bits(component, path_reg, CDC_WSA_RX_PATH_MIX_CLK_EN_MASK, CDC_WSA_RX_PATH_MIX_CLK_DISABLE); break; @@ -2071,14 +2073,14 @@ static const struct snd_soc_dapm_widget wsa_macro_dapm_widgets[] = { SND_SOC_DAPM_MUX("WSA_RX0 INP0", SND_SOC_NOPM, 0, 0, &rx0_prim_inp0_mux), SND_SOC_DAPM_MUX("WSA_RX0 INP1", SND_SOC_NOPM, 0, 0, &rx0_prim_inp1_mux), SND_SOC_DAPM_MUX("WSA_RX0 INP2", SND_SOC_NOPM, 0, 0, &rx0_prim_inp2_mux), - SND_SOC_DAPM_MUX_E("WSA_RX0 MIX INP", CDC_WSA_RX0_RX_PATH_MIX_CTL, - 0, 0, &rx0_mix_mux, wsa_macro_enable_mix_path, + SND_SOC_DAPM_MUX_E("WSA_RX0 MIX INP", SND_SOC_NOPM, WSA_MACRO_RX_MIX0, + 0, &rx0_mix_mux, wsa_macro_enable_mix_path, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), SND_SOC_DAPM_MUX("WSA_RX1 INP0", SND_SOC_NOPM, 0, 0, &rx1_prim_inp0_mux), SND_SOC_DAPM_MUX("WSA_RX1 INP1", SND_SOC_NOPM, 0, 0, &rx1_prim_inp1_mux), SND_SOC_DAPM_MUX("WSA_RX1 INP2", SND_SOC_NOPM, 0, 0, &rx1_prim_inp2_mux), - SND_SOC_DAPM_MUX_E("WSA_RX1 MIX INP", CDC_WSA_RX1_RX_PATH_MIX_CTL, - 0, 0, &rx1_mix_mux, wsa_macro_enable_mix_path, + SND_SOC_DAPM_MUX_E("WSA_RX1 MIX INP", SND_SOC_NOPM, WSA_MACRO_RX_MIX1, + 0, &rx1_mix_mux, wsa_macro_enable_mix_path, SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD), SND_SOC_DAPM_MIXER_E("WSA_RX INT0 MIX", SND_SOC_NOPM, 0, 0, NULL, 0, diff --git a/sound/soc/codecs/max98373.c b/sound/soc/codecs/max98373.c index 31d571d4fac1..746c829312b8 100644 --- a/sound/soc/codecs/max98373.c +++ b/sound/soc/codecs/max98373.c @@ -190,7 +190,7 @@ static int max98373_feedback_get(struct snd_kcontrol *kcontrol, } } - return snd_soc_put_volsw(kcontrol, ucontrol); + return snd_soc_get_volsw(kcontrol, ucontrol); } static const struct snd_kcontrol_new max98373_snd_controls[] = { diff --git a/sound/soc/codecs/rt5682-i2c.c b/sound/soc/codecs/rt5682-i2c.c index 37d13120f5ba..93c1603b42f1 100644 --- a/sound/soc/codecs/rt5682-i2c.c +++ b/sound/soc/codecs/rt5682-i2c.c @@ -273,6 +273,9 @@ static void rt5682_i2c_shutdown(struct i2c_client *client) { struct rt5682_priv *rt5682 = i2c_get_clientdata(client); + cancel_delayed_work_sync(&rt5682->jack_detect_work); + cancel_delayed_work_sync(&rt5682->jd_check_work); + rt5682_reset(rt5682); } diff --git a/sound/soc/codecs/wcd934x.c b/sound/soc/codecs/wcd934x.c index 40f682f5dab8..d18ae5e3ee80 100644 --- a/sound/soc/codecs/wcd934x.c +++ b/sound/soc/codecs/wcd934x.c @@ -1873,6 +1873,12 @@ static int wcd934x_set_channel_map(struct snd_soc_dai *dai, wcd = snd_soc_component_get_drvdata(dai->component); + if (tx_num > WCD934X_TX_MAX || rx_num > WCD934X_RX_MAX) { + dev_err(wcd->dev, "Invalid tx %d or rx %d channel count\n", + tx_num, rx_num); + return -EINVAL; + } + if (!tx_slot || !rx_slot) { dev_err(wcd->dev, "Invalid tx_slot=%p, rx_slot=%p\n", tx_slot, rx_slot); diff --git a/sound/soc/codecs/wsa881x.c b/sound/soc/codecs/wsa881x.c index 4530b74f5921..db87e07b11c9 100644 --- a/sound/soc/codecs/wsa881x.c +++ b/sound/soc/codecs/wsa881x.c @@ -640,6 +640,7 @@ static struct regmap_config wsa881x_regmap_config = { .val_bits = 8, .cache_type = REGCACHE_RBTREE, .reg_defaults = wsa881x_defaults, + .max_register = WSA881X_SPKR_STATUS3, .num_reg_defaults = ARRAY_SIZE(wsa881x_defaults), .volatile_reg = wsa881x_volatile_register, .readable_reg = wsa881x_readable_register, diff --git a/sound/soc/fsl/Kconfig b/sound/soc/fsl/Kconfig index 84db0b7b9d59..d7f30036d434 100644 --- a/sound/soc/fsl/Kconfig +++ b/sound/soc/fsl/Kconfig @@ -108,6 +108,7 @@ config SND_SOC_FSL_XCVR config SND_SOC_FSL_AUD2HTX tristate "AUDIO TO HDMI TX module support" depends on ARCH_MXC || COMPILE_TEST + select SND_SOC_IMX_PCM_DMA if SND_IMX_SOC != n help Say Y if you want to add AUDIO TO HDMI TX support for NXP. diff --git a/sound/soc/fsl/fsl_ssi.c b/sound/soc/fsl/fsl_ssi.c index 404be27c15fe..1d774c876c52 100644 --- a/sound/soc/fsl/fsl_ssi.c +++ b/sound/soc/fsl/fsl_ssi.c @@ -878,6 +878,7 @@ static int fsl_ssi_hw_free(struct snd_pcm_substream *substream, static int _fsl_ssi_set_dai_fmt(struct fsl_ssi *ssi, unsigned int fmt) { u32 strcr = 0, scr = 0, stcr, srcr, mask; + unsigned int slots; ssi->dai_fmt = fmt; @@ -909,10 +910,11 @@ static int _fsl_ssi_set_dai_fmt(struct fsl_ssi *ssi, unsigned int fmt) return -EINVAL; } + slots = ssi->slots ? : 2; regmap_update_bits(ssi->regs, REG_SSI_STCCR, - SSI_SxCCR_DC_MASK, SSI_SxCCR_DC(2)); + SSI_SxCCR_DC_MASK, SSI_SxCCR_DC(slots)); regmap_update_bits(ssi->regs, REG_SSI_SRCCR, - SSI_SxCCR_DC_MASK, SSI_SxCCR_DC(2)); + SSI_SxCCR_DC_MASK, SSI_SxCCR_DC(slots)); /* Data on rising edge of bclk, frame low, 1clk before data */ strcr |= SSI_STCR_TFSI | SSI_STCR_TSCKP | SSI_STCR_TEFS; diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 5520d7c80019..21d2e1cba380 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -71,6 +71,7 @@ enum { #define BYT_RT5640_SSP0_AIF2 BIT(21) #define BYT_RT5640_MCLK_EN BIT(22) #define BYT_RT5640_MCLK_25MHZ BIT(23) +#define BYT_RT5640_NO_SPEAKERS BIT(24) #define BYTCR_INPUT_DEFAULTS \ (BYT_RT5640_IN3_MAP | \ @@ -132,6 +133,8 @@ static void log_quirks(struct device *dev) dev_info(dev, "quirk JD_NOT_INV enabled\n"); if (byt_rt5640_quirk & BYT_RT5640_MONO_SPEAKER) dev_info(dev, "quirk MONO_SPEAKER enabled\n"); + if (byt_rt5640_quirk & BYT_RT5640_NO_SPEAKERS) + dev_info(dev, "quirk NO_SPEAKERS enabled\n"); if (byt_rt5640_quirk & BYT_RT5640_DIFF_MIC) dev_info(dev, "quirk DIFF_MIC enabled\n"); if (byt_rt5640_quirk & BYT_RT5640_SSP0_AIF1) { @@ -399,6 +402,19 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF1 | BYT_RT5640_MCLK_EN), }, + { /* Acer One 10 S1002 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Acer"), + DMI_MATCH(DMI_PRODUCT_NAME, "One S1002"), + }, + .driver_data = (void *)(BYT_RT5640_IN1_MAP | + BYT_RT5640_JD_SRC_JD2_IN4N | + BYT_RT5640_OVCD_TH_2000UA | + BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_DIFF_MIC | + BYT_RT5640_SSP0_AIF2 | + BYT_RT5640_MCLK_EN), + }, { .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Acer"), @@ -524,6 +540,16 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_MONO_SPEAKER | BYT_RT5640_MCLK_EN), }, + { /* Estar Beauty HD MID 7316R */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Estar"), + DMI_MATCH(DMI_PRODUCT_NAME, "eSTAR BEAUTY HD Intel Quad core"), + }, + .driver_data = (void *)(BYTCR_INPUT_DEFAULTS | + BYT_RT5640_MONO_SPEAKER | + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, { .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), @@ -551,7 +577,7 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { }, .driver_data = (void *)(BYT_RT5640_DMIC1_MAP | BYT_RT5640_JD_SRC_JD1_IN4P | - BYT_RT5640_OVCD_TH_1500UA | + BYT_RT5640_OVCD_TH_2000UA | BYT_RT5640_OVCD_SF_0P75 | BYT_RT5640_MCLK_EN), }, @@ -798,6 +824,20 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { BYT_RT5640_SSP0_AIF2 | BYT_RT5640_MCLK_EN), }, + { /* Voyo Winpad A15 */ + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"), + DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"), + /* Above strings are too generic, also match on BIOS date */ + DMI_MATCH(DMI_BIOS_DATE, "11/20/2014"), + }, + .driver_data = (void *)(BYT_RT5640_IN1_MAP | + BYT_RT5640_JD_SRC_JD2_IN4N | + BYT_RT5640_OVCD_TH_2000UA | + BYT_RT5640_OVCD_SF_0P75 | + BYT_RT5640_DIFF_MIC | + BYT_RT5640_MCLK_EN), + }, { /* Catch-all for generic Insyde tablets, must be last */ .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Insyde"), @@ -946,7 +986,7 @@ static int byt_rt5640_init(struct snd_soc_pcm_runtime *runtime) ret = snd_soc_dapm_add_routes(&card->dapm, byt_rt5640_mono_spk_map, ARRAY_SIZE(byt_rt5640_mono_spk_map)); - } else { + } else if (!(byt_rt5640_quirk & BYT_RT5640_NO_SPEAKERS)) { ret = snd_soc_dapm_add_routes(&card->dapm, byt_rt5640_stereo_spk_map, ARRAY_SIZE(byt_rt5640_stereo_spk_map)); @@ -1188,6 +1228,7 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; static const char * const map_name[] = { "dmic1", "dmic2", "in1", "in3" }; + __maybe_unused const char *spk_type; const struct dmi_system_id *dmi_id; struct byt_rt5640_private *priv; struct snd_soc_acpi_mach *mach; @@ -1196,7 +1237,7 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) bool sof_parent; int ret_val = 0; int dai_index = 0; - int i; + int i, cfg_spk; is_bytcr = false; priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); @@ -1335,16 +1376,24 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) } } + if (byt_rt5640_quirk & BYT_RT5640_NO_SPEAKERS) { + cfg_spk = 0; + spk_type = "none"; + } else if (byt_rt5640_quirk & BYT_RT5640_MONO_SPEAKER) { + cfg_spk = 1; + spk_type = "mono"; + } else { + cfg_spk = 2; + spk_type = "stereo"; + } + snprintf(byt_rt5640_components, sizeof(byt_rt5640_components), - "cfg-spk:%s cfg-mic:%s", - (byt_rt5640_quirk & BYT_RT5640_MONO_SPEAKER) ? "1" : "2", + "cfg-spk:%d cfg-mic:%s", cfg_spk, map_name[BYT_RT5640_MAP(byt_rt5640_quirk)]); byt_rt5640_card.components = byt_rt5640_components; #if !IS_ENABLED(CONFIG_SND_SOC_INTEL_USER_FRIENDLY_LONG_NAMES) snprintf(byt_rt5640_long_name, sizeof(byt_rt5640_long_name), - "bytcr-rt5640-%s-spk-%s-mic", - (byt_rt5640_quirk & BYT_RT5640_MONO_SPEAKER) ? - "mono" : "stereo", + "bytcr-rt5640-%s-spk-%s-mic", spk_type, map_name[BYT_RT5640_MAP(byt_rt5640_quirk)]); byt_rt5640_card.long_name = byt_rt5640_long_name; #endif diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c index f289ec8563a1..148b7b1bd3e8 100644 --- a/sound/soc/intel/boards/bytcr_rt5651.c +++ b/sound/soc/intel/boards/bytcr_rt5651.c @@ -435,6 +435,19 @@ static const struct dmi_system_id byt_rt5651_quirk_table[] = { BYT_RT5651_SSP0_AIF1 | BYT_RT5651_MONO_SPEAKER), }, + { + /* Jumper EZpad 7 */ + .callback = byt_rt5651_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Jumper"), + DMI_MATCH(DMI_PRODUCT_NAME, "EZpad"), + /* Jumper12x.WJ2012.bsBKRCP05 with the version dropped */ + DMI_MATCH(DMI_BIOS_VERSION, "Jumper12x.WJ2012.bsBKRCP"), + }, + .driver_data = (void *)(BYT_RT5651_DEFAULT_QUIRKS | + BYT_RT5651_IN2_MAP | + BYT_RT5651_JD_NOT_INV), + }, { /* KIANO SlimNote 14.2 */ .callback = byt_rt5651_quirk_cb, diff --git a/sound/soc/intel/boards/sof_sdw.c b/sound/soc/intel/boards/sof_sdw.c index 6d0d6ef711e0..1d7677376e74 100644 --- a/sound/soc/intel/boards/sof_sdw.c +++ b/sound/soc/intel/boards/sof_sdw.c @@ -48,34 +48,14 @@ static int sof_sdw_quirk_cb(const struct dmi_system_id *id) } static const struct dmi_system_id sof_sdw_quirk_table[] = { + /* CometLake devices */ { .callback = sof_sdw_quirk_cb, .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0A32") - }, - .driver_data = (void *)(SOF_RT711_JD_SRC_JD2 | - SOF_RT715_DAI_ID_FIX | - SOF_SDW_FOUR_SPK), - }, - { - .callback = sof_sdw_quirk_cb, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0A3E") - }, - .driver_data = (void *)(SOF_RT711_JD_SRC_JD2 | - SOF_RT715_DAI_ID_FIX), - }, - { - .callback = sof_sdw_quirk_cb, - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), - DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0A5E") + DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "CometLake Client"), }, - .driver_data = (void *)(SOF_RT711_JD_SRC_JD2 | - SOF_RT715_DAI_ID_FIX | - SOF_SDW_FOUR_SPK), + .driver_data = (void *)SOF_SDW_PCH_DMIC, }, { .callback = sof_sdw_quirk_cb, @@ -106,7 +86,7 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { SOF_RT715_DAI_ID_FIX | SOF_SDW_FOUR_SPK), }, - { + { .callback = sof_sdw_quirk_cb, .matches = { DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), @@ -116,6 +96,16 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { SOF_RT715_DAI_ID_FIX | SOF_SDW_FOUR_SPK), }, + /* IceLake devices */ + { + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), + DMI_MATCH(DMI_PRODUCT_NAME, "Ice Lake Client"), + }, + .driver_data = (void *)SOF_SDW_PCH_DMIC, + }, + /* TigerLake devices */ { .callback = sof_sdw_quirk_cb, .matches = { @@ -123,25 +113,31 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Tiger Lake Client Platform"), }, - .driver_data = (void *)(SOF_RT711_JD_SRC_JD1 | - SOF_SDW_TGL_HDMI | SOF_SDW_PCH_DMIC | - SOF_SSP_PORT(SOF_I2S_SSP2)), + .driver_data = (void *)(SOF_SDW_TGL_HDMI | + SOF_RT711_JD_SRC_JD1 | + SOF_SDW_PCH_DMIC | + SOF_SSP_PORT(SOF_I2S_SSP2)), }, { .callback = sof_sdw_quirk_cb, .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "Ice Lake Client"), + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0A3E") }, - .driver_data = (void *)SOF_SDW_PCH_DMIC, + .driver_data = (void *)(SOF_SDW_TGL_HDMI | + SOF_RT711_JD_SRC_JD2 | + SOF_RT715_DAI_ID_FIX), }, { .callback = sof_sdw_quirk_cb, .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "Intel Corporation"), - DMI_MATCH(DMI_PRODUCT_NAME, "CometLake Client"), + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0A5E") }, - .driver_data = (void *)SOF_SDW_PCH_DMIC, + .driver_data = (void *)(SOF_SDW_TGL_HDMI | + SOF_RT711_JD_SRC_JD2 | + SOF_RT715_DAI_ID_FIX | + SOF_SDW_FOUR_SPK), }, { .callback = sof_sdw_quirk_cb, @@ -149,7 +145,8 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "Google"), DMI_MATCH(DMI_PRODUCT_NAME, "Volteer"), }, - .driver_data = (void *)(SOF_SDW_TGL_HDMI | SOF_SDW_PCH_DMIC | + .driver_data = (void *)(SOF_SDW_TGL_HDMI | + SOF_SDW_PCH_DMIC | SOF_SDW_FOUR_SPK), }, { @@ -158,10 +155,38 @@ static const struct dmi_system_id sof_sdw_quirk_table[] = { DMI_MATCH(DMI_SYS_VENDOR, "Google"), DMI_MATCH(DMI_PRODUCT_NAME, "Ripto"), }, - .driver_data = (void *)(SOF_SDW_TGL_HDMI | SOF_SDW_PCH_DMIC | + .driver_data = (void *)(SOF_SDW_TGL_HDMI | + SOF_SDW_PCH_DMIC | + SOF_SDW_FOUR_SPK), + }, + { + /* + * this entry covers multiple HP SKUs. The family name + * does not seem robust enough, so we use a partial + * match that ignores the product name suffix + * (e.g. 15-eb1xxx, 14t-ea000 or 13-aw2xxx) + */ + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "HP"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Convertible"), + }, + .driver_data = (void *)(SOF_SDW_TGL_HDMI | + SOF_SDW_PCH_DMIC | + SOF_RT711_JD_SRC_JD2), + }, + /* TigerLake-SDCA devices */ + { + .callback = sof_sdw_quirk_cb, + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"), + DMI_EXACT_MATCH(DMI_PRODUCT_SKU, "0A32") + }, + .driver_data = (void *)(SOF_SDW_TGL_HDMI | + SOF_RT711_JD_SRC_JD2 | + SOF_RT715_DAI_ID_FIX | SOF_SDW_FOUR_SPK), }, - {} }; @@ -930,7 +955,7 @@ static int sof_card_dai_links_create(struct device *dev, ctx->idisp_codec = true; /* enable dmic01 & dmic16k */ - dmic_num = (sof_sdw_quirk & SOF_SDW_PCH_DMIC) ? 2 : 0; + dmic_num = (sof_sdw_quirk & SOF_SDW_PCH_DMIC || mach_params->dmic_num) ? 2 : 0; comp_num += dmic_num; dev_dbg(dev, "sdw %d, ssp %d, dmic %d, hdmi %d", sdw_be_num, ssp_num, diff --git a/sound/soc/intel/common/soc-intel-quirks.h b/sound/soc/intel/common/soc-intel-quirks.h index b07df3059926..a93987ab7f4d 100644 --- a/sound/soc/intel/common/soc-intel-quirks.h +++ b/sound/soc/intel/common/soc-intel-quirks.h @@ -11,6 +11,7 @@ #if IS_ENABLED(CONFIG_X86) +#include #include #include #include @@ -38,12 +39,36 @@ SOC_INTEL_IS_CPU(cml, KABYLAKE_L); static inline bool soc_intel_is_byt_cr(struct platform_device *pdev) { + /* + * List of systems which: + * 1. Use a non CR version of the Bay Trail SoC + * 2. Contain at least 6 interrupt resources so that the + * platform_get_resource(pdev, IORESOURCE_IRQ, 5) check below + * succeeds + * 3. Despite 1. and 2. still have their IPC IRQ at index 0 rather then 5 + * + * This needs to be here so that it can be shared between the SST and + * SOF drivers. We rely on the compiler to optimize this out in files + * where soc_intel_is_byt_cr is not used. + */ + static const struct dmi_system_id force_bytcr_table[] = { + { /* Lenovo Yoga Tablet 2 series */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_FAMILY, "YOGATablet2"), + }, + }, + {} + }; struct device *dev = &pdev->dev; int status = 0; if (!soc_intel_is_byt()) return false; + if (dmi_check_system(force_bytcr_table)) + return true; + if (iosf_mbi_available()) { u32 bios_status; diff --git a/sound/soc/qcom/lpass-apq8016.c b/sound/soc/qcom/lpass-apq8016.c index 8507ef8f6679..3efa133d1c64 100644 --- a/sound/soc/qcom/lpass-apq8016.c +++ b/sound/soc/qcom/lpass-apq8016.c @@ -250,7 +250,7 @@ static struct lpass_variant apq8016_data = { .micmode = REG_FIELD_ID(0x1000, 4, 7, 4, 0x1000), .micmono = REG_FIELD_ID(0x1000, 3, 3, 4, 0x1000), .wssrc = REG_FIELD_ID(0x1000, 2, 2, 4, 0x1000), - .bitwidth = REG_FIELD_ID(0x1000, 0, 0, 4, 0x1000), + .bitwidth = REG_FIELD_ID(0x1000, 0, 1, 4, 0x1000), .rdma_dyncclk = REG_FIELD_ID(0x8400, 12, 12, 2, 0x1000), .rdma_bursten = REG_FIELD_ID(0x8400, 11, 11, 2, 0x1000), diff --git a/sound/soc/qcom/lpass-cpu.c b/sound/soc/qcom/lpass-cpu.c index 66b834312f33..cd4fb77e9d51 100644 --- a/sound/soc/qcom/lpass-cpu.c +++ b/sound/soc/qcom/lpass-cpu.c @@ -286,16 +286,12 @@ static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream, dev_err(dai->dev, "error writing to i2sctl reg: %d\n", ret); - if (drvdata->bit_clk_state[id] == LPAIF_BIT_CLK_DISABLE) { - ret = clk_enable(drvdata->mi2s_bit_clk[id]); - if (ret) { - dev_err(dai->dev, "error in enabling mi2s bit clk: %d\n", ret); - clk_disable(drvdata->mi2s_osr_clk[id]); - return ret; - } - drvdata->bit_clk_state[id] = LPAIF_BIT_CLK_ENABLE; + ret = clk_enable(drvdata->mi2s_bit_clk[id]); + if (ret) { + dev_err(dai->dev, "error in enabling mi2s bit clk: %d\n", ret); + clk_disable(drvdata->mi2s_osr_clk[id]); + return ret; } - break; case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: @@ -310,10 +306,9 @@ static int lpass_cpu_daiops_trigger(struct snd_pcm_substream *substream, if (ret) dev_err(dai->dev, "error writing to i2sctl reg: %d\n", ret); - if (drvdata->bit_clk_state[id] == LPAIF_BIT_CLK_ENABLE) { - clk_disable(drvdata->mi2s_bit_clk[dai->driver->id]); - drvdata->bit_clk_state[id] = LPAIF_BIT_CLK_DISABLE; - } + + clk_disable(drvdata->mi2s_bit_clk[dai->driver->id]); + break; } @@ -599,7 +594,7 @@ static bool lpass_hdmi_regmap_writeable(struct device *dev, unsigned int reg) return true; } - for (i = 0; i < v->rdma_channels; ++i) { + for (i = 0; i < v->hdmi_rdma_channels; ++i) { if (reg == LPAIF_HDMI_RDMACTL_REG(v, i)) return true; if (reg == LPAIF_HDMI_RDMABASE_REG(v, i)) @@ -645,7 +640,7 @@ static bool lpass_hdmi_regmap_readable(struct device *dev, unsigned int reg) if (reg == LPASS_HDMITX_APP_IRQSTAT_REG(v)) return true; - for (i = 0; i < v->rdma_channels; ++i) { + for (i = 0; i < v->hdmi_rdma_channels; ++i) { if (reg == LPAIF_HDMI_RDMACTL_REG(v, i)) return true; if (reg == LPAIF_HDMI_RDMABASE_REG(v, i)) @@ -672,7 +667,7 @@ static bool lpass_hdmi_regmap_volatile(struct device *dev, unsigned int reg) if (reg == LPASS_HDMI_TX_LEGACY_ADDR(v)) return true; - for (i = 0; i < v->rdma_channels; ++i) { + for (i = 0; i < v->hdmi_rdma_channels; ++i) { if (reg == LPAIF_HDMI_RDMACURR_REG(v, i)) return true; } @@ -742,13 +737,12 @@ static void of_lpass_cpu_parse_dai_data(struct device *dev, for_each_child_of_node(dev->of_node, node) { ret = of_property_read_u32(node, "reg", &id); - if (ret || id < 0 || id >= data->variant->num_dai) { + if (ret || id < 0) { dev_err(dev, "valid dai id not found: %d\n", ret); continue; } if (id == LPASS_DP_RX) { data->hdmi_port_enable = 1; - dev_err(dev, "HDMI Port is enabled: %d\n", id); } else { data->mi2s_playback_sd_mode[id] = of_lpass_cpu_parse_sd_lines(dev, node, @@ -822,7 +816,7 @@ int asoc_qcom_lpass_cpu_platform_probe(struct platform_device *pdev) } lpass_hdmi_regmap_config.max_register = LPAIF_HDMI_RDMAPER_REG(variant, - variant->hdmi_rdma_channels); + variant->hdmi_rdma_channels - 1); drvdata->hdmiif_map = devm_regmap_init_mmio(dev, drvdata->hdmiif, &lpass_hdmi_regmap_config); if (IS_ERR(drvdata->hdmiif_map)) { @@ -866,7 +860,6 @@ int asoc_qcom_lpass_cpu_platform_probe(struct platform_device *pdev) PTR_ERR(drvdata->mi2s_bit_clk[dai_id])); return PTR_ERR(drvdata->mi2s_bit_clk[dai_id]); } - drvdata->bit_clk_state[dai_id] = LPAIF_BIT_CLK_DISABLE; } /* Allocation for i2sctl regmap fields */ diff --git a/sound/soc/qcom/lpass-lpaif-reg.h b/sound/soc/qcom/lpass-lpaif-reg.h index baf72f124ea9..2eb03ad9b7c7 100644 --- a/sound/soc/qcom/lpass-lpaif-reg.h +++ b/sound/soc/qcom/lpass-lpaif-reg.h @@ -60,9 +60,6 @@ #define LPAIF_I2SCTL_BITWIDTH_24 1 #define LPAIF_I2SCTL_BITWIDTH_32 2 -#define LPAIF_BIT_CLK_DISABLE 0 -#define LPAIF_BIT_CLK_ENABLE 1 - #define LPAIF_I2SCTL_RESET_STATE 0x003C0004 #define LPAIF_DMACTL_RESET_STATE 0x00200000 diff --git a/sound/soc/qcom/lpass-sc7180.c b/sound/soc/qcom/lpass-sc7180.c index 735c9dac28f2..8c168d3c589e 100644 --- a/sound/soc/qcom/lpass-sc7180.c +++ b/sound/soc/qcom/lpass-sc7180.c @@ -171,7 +171,7 @@ static struct lpass_variant sc7180_data = { .rdma_channels = 5, .hdmi_rdma_reg_base = 0x64000, .hdmi_rdma_reg_stride = 0x1000, - .hdmi_rdma_channels = 3, + .hdmi_rdma_channels = 4, .dmactl_audif_start = 1, .wrdma_reg_base = 0x18000, .wrdma_reg_stride = 0x1000, diff --git a/sound/soc/qcom/lpass.h b/sound/soc/qcom/lpass.h index 2d68af0da34d..83b2e08ade06 100644 --- a/sound/soc/qcom/lpass.h +++ b/sound/soc/qcom/lpass.h @@ -68,7 +68,6 @@ struct lpass_data { unsigned int mi2s_playback_sd_mode[LPASS_MAX_MI2S_PORTS]; unsigned int mi2s_capture_sd_mode[LPASS_MAX_MI2S_PORTS]; int hdmi_port_enable; - int bit_clk_state[LPASS_MAX_MI2S_PORTS]; /* low-power audio interface (LPAIF) registers */ void __iomem *lpaif; diff --git a/sound/soc/qcom/qdsp6/q6asm-dai.c b/sound/soc/qcom/qdsp6/q6asm-dai.c index c9ac9c1d26c4..9766725c2916 100644 --- a/sound/soc/qcom/qdsp6/q6asm-dai.c +++ b/sound/soc/qcom/qdsp6/q6asm-dai.c @@ -1233,6 +1233,25 @@ static void q6asm_dai_pcm_free(struct snd_soc_component *component, } } +static const struct snd_soc_dapm_widget q6asm_dapm_widgets[] = { + SND_SOC_DAPM_AIF_IN("MM_DL1", "MultiMedia1 Playback", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("MM_DL2", "MultiMedia2 Playback", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("MM_DL3", "MultiMedia3 Playback", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("MM_DL4", "MultiMedia4 Playback", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("MM_DL5", "MultiMedia5 Playback", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("MM_DL6", "MultiMedia6 Playback", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("MM_DL7", "MultiMedia7 Playback", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_IN("MM_DL8", "MultiMedia8 Playback", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("MM_UL1", "MultiMedia1 Capture", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("MM_UL2", "MultiMedia2 Capture", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("MM_UL3", "MultiMedia3 Capture", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("MM_UL4", "MultiMedia4 Capture", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("MM_UL5", "MultiMedia5 Capture", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("MM_UL6", "MultiMedia6 Capture", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("MM_UL7", "MultiMedia7 Capture", 0, SND_SOC_NOPM, 0, 0), + SND_SOC_DAPM_AIF_OUT("MM_UL8", "MultiMedia8 Capture", 0, SND_SOC_NOPM, 0, 0), +}; + static const struct snd_soc_component_driver q6asm_fe_dai_component = { .name = DRV_NAME, .open = q6asm_dai_open, @@ -1245,6 +1264,8 @@ static const struct snd_soc_component_driver q6asm_fe_dai_component = { .pcm_construct = q6asm_dai_pcm_new, .pcm_destruct = q6asm_dai_pcm_free, .compress_ops = &q6asm_dai_compress_ops, + .dapm_widgets = q6asm_dapm_widgets, + .num_dapm_widgets = ARRAY_SIZE(q6asm_dapm_widgets), }; static struct snd_soc_dai_driver q6asm_fe_dais_template[] = { diff --git a/sound/soc/qcom/qdsp6/q6routing.c b/sound/soc/qcom/qdsp6/q6routing.c index 53185e26fea1..0a6b9433f6ac 100644 --- a/sound/soc/qcom/qdsp6/q6routing.c +++ b/sound/soc/qcom/qdsp6/q6routing.c @@ -713,24 +713,6 @@ static const struct snd_kcontrol_new mmul8_mixer_controls[] = { Q6ROUTING_TX_MIXERS(MSM_FRONTEND_DAI_MULTIMEDIA8) }; static const struct snd_soc_dapm_widget msm_qdsp6_widgets[] = { - /* Frontend AIF */ - SND_SOC_DAPM_AIF_IN("MM_DL1", "MultiMedia1 Playback", 0, 0, 0, 0), - SND_SOC_DAPM_AIF_IN("MM_DL2", "MultiMedia2 Playback", 0, 0, 0, 0), - SND_SOC_DAPM_AIF_IN("MM_DL3", "MultiMedia3 Playback", 0, 0, 0, 0), - SND_SOC_DAPM_AIF_IN("MM_DL4", "MultiMedia4 Playback", 0, 0, 0, 0), - SND_SOC_DAPM_AIF_IN("MM_DL5", "MultiMedia5 Playback", 0, 0, 0, 0), - SND_SOC_DAPM_AIF_IN("MM_DL6", "MultiMedia6 Playback", 0, 0, 0, 0), - SND_SOC_DAPM_AIF_IN("MM_DL7", "MultiMedia7 Playback", 0, 0, 0, 0), - SND_SOC_DAPM_AIF_IN("MM_DL8", "MultiMedia8 Playback", 0, 0, 0, 0), - SND_SOC_DAPM_AIF_OUT("MM_UL1", "MultiMedia1 Capture", 0, 0, 0, 0), - SND_SOC_DAPM_AIF_OUT("MM_UL2", "MultiMedia2 Capture", 0, 0, 0, 0), - SND_SOC_DAPM_AIF_OUT("MM_UL3", "MultiMedia3 Capture", 0, 0, 0, 0), - SND_SOC_DAPM_AIF_OUT("MM_UL4", "MultiMedia4 Capture", 0, 0, 0, 0), - SND_SOC_DAPM_AIF_OUT("MM_UL5", "MultiMedia5 Capture", 0, 0, 0, 0), - SND_SOC_DAPM_AIF_OUT("MM_UL6", "MultiMedia6 Capture", 0, 0, 0, 0), - SND_SOC_DAPM_AIF_OUT("MM_UL7", "MultiMedia7 Capture", 0, 0, 0, 0), - SND_SOC_DAPM_AIF_OUT("MM_UL8", "MultiMedia8 Capture", 0, 0, 0, 0), - /* Mixer definitions */ SND_SOC_DAPM_MIXER("HDMI Mixer", SND_SOC_NOPM, 0, 0, hdmi_mixer_controls, diff --git a/sound/soc/qcom/sdm845.c b/sound/soc/qcom/sdm845.c index 6c2760e27ea6..153e9b2de0b5 100644 --- a/sound/soc/qcom/sdm845.c +++ b/sound/soc/qcom/sdm845.c @@ -27,18 +27,18 @@ #define SPK_TDM_RX_MASK 0x03 #define NUM_TDM_SLOTS 8 #define SLIM_MAX_TX_PORTS 16 -#define SLIM_MAX_RX_PORTS 16 +#define SLIM_MAX_RX_PORTS 13 #define WCD934X_DEFAULT_MCLK_RATE 9600000 struct sdm845_snd_data { struct snd_soc_jack jack; bool jack_setup; - bool stream_prepared[SLIM_MAX_RX_PORTS]; + bool stream_prepared[AFE_PORT_MAX]; struct snd_soc_card *card; uint32_t pri_mi2s_clk_count; uint32_t sec_mi2s_clk_count; uint32_t quat_tdm_clk_count; - struct sdw_stream_runtime *sruntime[SLIM_MAX_RX_PORTS]; + struct sdw_stream_runtime *sruntime[AFE_PORT_MAX]; }; static unsigned int tdm_slot_offset[8] = {0, 4, 8, 12, 16, 20, 24, 28}; diff --git a/sound/soc/sh/siu.h b/sound/soc/sh/siu.h index 6201840f1bc0..a675c36fc9d9 100644 --- a/sound/soc/sh/siu.h +++ b/sound/soc/sh/siu.h @@ -169,7 +169,7 @@ static inline u32 siu_read32(u32 __iomem *addr) #define SIU_BRGBSEL (0x108 / sizeof(u32)) #define SIU_BRRB (0x10c / sizeof(u32)) -extern struct snd_soc_component_driver siu_component; +extern const struct snd_soc_component_driver siu_component; extern struct siu_info *siu_i2s_data; int siu_init_port(int port, struct siu_port **port_info, struct snd_card *card); diff --git a/sound/soc/sh/siu_pcm.c b/sound/soc/sh/siu_pcm.c index 45c4320976ab..4785886df4f0 100644 --- a/sound/soc/sh/siu_pcm.c +++ b/sound/soc/sh/siu_pcm.c @@ -543,7 +543,7 @@ static void siu_pcm_free(struct snd_soc_component *component, dev_dbg(pcm->card->dev, "%s\n", __func__); } -struct const snd_soc_component_driver siu_component = { +const struct snd_soc_component_driver siu_component = { .name = DRV_NAME, .open = siu_pcm_open, .close = siu_pcm_close, diff --git a/sound/soc/sof/debug.c b/sound/soc/sof/debug.c index 30213a1beaaa..715a374b33cf 100644 --- a/sound/soc/sof/debug.c +++ b/sound/soc/sof/debug.c @@ -352,7 +352,7 @@ static ssize_t sof_dfsentry_write(struct file *file, const char __user *buffer, char *string; int ret; - string = kzalloc(count, GFP_KERNEL); + string = kzalloc(count+1, GFP_KERNEL); if (!string) return -ENOMEM; diff --git a/sound/soc/sof/intel/hda-dsp.c b/sound/soc/sof/intel/hda-dsp.c index 1c5e05b88a90..012bac41fee0 100644 --- a/sound/soc/sof/intel/hda-dsp.c +++ b/sound/soc/sof/intel/hda-dsp.c @@ -207,7 +207,7 @@ int hda_dsp_core_power_down(struct snd_sof_dev *sdev, unsigned int core_mask) ret = snd_sof_dsp_read_poll_timeout(sdev, HDA_DSP_BAR, HDA_DSP_REG_ADSPCS, adspcs, - !(adspcs & HDA_DSP_ADSPCS_SPA_MASK(core_mask)), + !(adspcs & HDA_DSP_ADSPCS_CPA_MASK(core_mask)), HDA_DSP_REG_POLL_INTERVAL_US, HDA_DSP_PD_TIMEOUT * USEC_PER_MSEC); if (ret < 0) @@ -802,11 +802,15 @@ int hda_dsp_runtime_idle(struct snd_sof_dev *sdev) int hda_dsp_runtime_suspend(struct snd_sof_dev *sdev) { + struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata; const struct sof_dsp_power_state target_state = { .state = SOF_DSP_PM_D3, }; int ret; + /* cancel any attempt for DSP D0I3 */ + cancel_delayed_work_sync(&hda->d0i3_work); + /* stop hda controller and power dsp off */ ret = hda_suspend(sdev, true); if (ret < 0) diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c index 509a9b256423..de6bc501f1b5 100644 --- a/sound/soc/sof/intel/hda.c +++ b/sound/soc/sof/intel/hda.c @@ -896,6 +896,7 @@ int hda_dsp_probe(struct snd_sof_dev *sdev) /* dsp_unmap: not currently used */ iounmap(sdev->bar[HDA_DSP_BAR]); hdac_bus_unmap: + platform_device_unregister(hdev->dmic_dev); iounmap(bus->remap_addr); hda_codec_i915_exit(sdev); err: diff --git a/sound/soc/sof/sof-pci-dev.c b/sound/soc/sof/sof-pci-dev.c index 215711ac7450..9adf50b20a73 100644 --- a/sound/soc/sof/sof-pci-dev.c +++ b/sound/soc/sof/sof-pci-dev.c @@ -65,6 +65,13 @@ static const struct dmi_system_id community_key_platforms[] = { DMI_MATCH(DMI_BOARD_NAME, "UP-APL01"), } }, + { + .ident = "Up Extreme", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "AAEON"), + DMI_MATCH(DMI_BOARD_NAME, "UP-WHL01"), + } + }, { .ident = "Google Chromebooks", .matches = { diff --git a/sound/usb/card.c b/sound/usb/card.c index e08fbf8e3ee0..3007922a8ed8 100644 --- a/sound/usb/card.c +++ b/sound/usb/card.c @@ -831,6 +831,9 @@ static int usb_audio_probe(struct usb_interface *intf, snd_media_device_create(chip, intf); } + if (quirk) + chip->quirk_type = quirk->type; + usb_chip[chip->index] = chip; chip->intf[chip->num_interfaces] = intf; chip->num_interfaces++; @@ -905,6 +908,9 @@ static void usb_audio_disconnect(struct usb_interface *intf) } } + if (chip->quirk_type & QUIRK_SETUP_DISABLE_AUTOSUSPEND) + usb_enable_autosuspend(interface_to_usbdev(intf)); + chip->num_interfaces--; if (chip->num_interfaces <= 0) { usb_chip[chip->index] = NULL; diff --git a/sound/usb/card.h b/sound/usb/card.h index 37091b117614..a741e7da83a2 100644 --- a/sound/usb/card.h +++ b/sound/usb/card.h @@ -71,7 +71,7 @@ struct snd_usb_endpoint { unsigned char altsetting; /* corresponding alternate setting */ unsigned char ep_idx; /* endpoint array index */ - unsigned long flags; /* running bit flags */ + atomic_t state; /* running state */ void (*prepare_data_urb) (struct snd_usb_substream *subs, struct urb *urb); diff --git a/sound/usb/clock.c b/sound/usb/clock.c index dc68ed65e478..771b65232957 100644 --- a/sound/usb/clock.c +++ b/sound/usb/clock.c @@ -646,10 +646,10 @@ static int set_sample_rate_v2v3(struct snd_usb_audio *chip, cur_rate = prev_rate; if (cur_rate != rate) { - usb_audio_warn(chip, - "%d:%d: freq mismatch (RO clock): req %d, clock runs @%d\n", - fmt->iface, fmt->altsetting, rate, cur_rate); - return -ENXIO; + usb_audio_dbg(chip, + "%d:%d: freq mismatch: req %d, clock runs @%d\n", + fmt->iface, fmt->altsetting, rate, cur_rate); + /* continue processing */ } validation: diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c index 8e568823c992..102d53515a76 100644 --- a/sound/usb/endpoint.c +++ b/sound/usb/endpoint.c @@ -21,8 +21,11 @@ #include "clock.h" #include "quirks.h" -#define EP_FLAG_RUNNING 1 -#define EP_FLAG_STOPPING 2 +enum { + EP_STATE_STOPPED, + EP_STATE_RUNNING, + EP_STATE_STOPPING, +}; /* interface refcounting */ struct snd_usb_iface_ref { @@ -115,6 +118,16 @@ static const char *usb_error_string(int err) } } +static inline bool ep_state_running(struct snd_usb_endpoint *ep) +{ + return atomic_read(&ep->state) == EP_STATE_RUNNING; +} + +static inline bool ep_state_update(struct snd_usb_endpoint *ep, int old, int new) +{ + return atomic_cmpxchg(&ep->state, old, new) == old; +} + /** * snd_usb_endpoint_implicit_feedback_sink: Report endpoint usage type * @@ -393,7 +406,7 @@ next_packet_fifo_dequeue(struct snd_usb_endpoint *ep) */ static void queue_pending_output_urbs(struct snd_usb_endpoint *ep) { - while (test_bit(EP_FLAG_RUNNING, &ep->flags)) { + while (ep_state_running(ep)) { unsigned long flags; struct snd_usb_packet_info *packet; @@ -454,13 +467,13 @@ static void snd_complete_urb(struct urb *urb) if (unlikely(atomic_read(&ep->chip->shutdown))) goto exit_clear; - if (unlikely(!test_bit(EP_FLAG_RUNNING, &ep->flags))) + if (unlikely(!ep_state_running(ep))) goto exit_clear; if (usb_pipeout(ep->pipe)) { retire_outbound_urb(ep, ctx); /* can be stopped during retire callback */ - if (unlikely(!test_bit(EP_FLAG_RUNNING, &ep->flags))) + if (unlikely(!ep_state_running(ep))) goto exit_clear; if (snd_usb_endpoint_implicit_feedback_sink(ep)) { @@ -474,12 +487,12 @@ static void snd_complete_urb(struct urb *urb) prepare_outbound_urb(ep, ctx); /* can be stopped during prepare callback */ - if (unlikely(!test_bit(EP_FLAG_RUNNING, &ep->flags))) + if (unlikely(!ep_state_running(ep))) goto exit_clear; } else { retire_inbound_urb(ep, ctx); /* can be stopped during retire callback */ - if (unlikely(!test_bit(EP_FLAG_RUNNING, &ep->flags))) + if (unlikely(!ep_state_running(ep))) goto exit_clear; prepare_inbound_urb(ep, ctx); @@ -835,7 +848,7 @@ static int wait_clear_urbs(struct snd_usb_endpoint *ep) unsigned long end_time = jiffies + msecs_to_jiffies(1000); int alive; - if (!test_bit(EP_FLAG_STOPPING, &ep->flags)) + if (atomic_read(&ep->state) != EP_STATE_STOPPING) return 0; do { @@ -850,10 +863,11 @@ static int wait_clear_urbs(struct snd_usb_endpoint *ep) usb_audio_err(ep->chip, "timeout: still %d active urbs on EP #%x\n", alive, ep->ep_num); - clear_bit(EP_FLAG_STOPPING, &ep->flags); - ep->sync_sink = NULL; - snd_usb_endpoint_set_callback(ep, NULL, NULL, NULL); + if (ep_state_update(ep, EP_STATE_STOPPING, EP_STATE_STOPPED)) { + ep->sync_sink = NULL; + snd_usb_endpoint_set_callback(ep, NULL, NULL, NULL); + } return 0; } @@ -868,26 +882,20 @@ void snd_usb_endpoint_sync_pending_stop(struct snd_usb_endpoint *ep) } /* - * Stop and unlink active urbs. + * Stop active urbs * - * This function checks and clears EP_FLAG_RUNNING state. - * When @wait_sync is set, it waits until all pending URBs are killed. + * This function moves the EP to STOPPING state if it's being RUNNING. */ -static int stop_and_unlink_urbs(struct snd_usb_endpoint *ep, bool force, - bool wait_sync) +static int stop_urbs(struct snd_usb_endpoint *ep, bool force) { unsigned int i; - if (!force && atomic_read(&ep->chip->shutdown)) /* to be sure... */ - return -EBADFD; - - if (atomic_read(&ep->running)) + if (!force && atomic_read(&ep->running)) return -EBUSY; - if (!test_and_clear_bit(EP_FLAG_RUNNING, &ep->flags)) - goto out; + if (!ep_state_update(ep, EP_STATE_RUNNING, EP_STATE_STOPPING)) + return 0; - set_bit(EP_FLAG_STOPPING, &ep->flags); INIT_LIST_HEAD(&ep->ready_playback_urbs); ep->next_packet_head = 0; ep->next_packet_queued = 0; @@ -901,24 +909,25 @@ static int stop_and_unlink_urbs(struct snd_usb_endpoint *ep, bool force, } } - out: - if (wait_sync) - return wait_clear_urbs(ep); return 0; } /* * release an endpoint's urbs */ -static void release_urbs(struct snd_usb_endpoint *ep, int force) +static int release_urbs(struct snd_usb_endpoint *ep, bool force) { - int i; + int i, err; /* route incoming urbs to nirvana */ snd_usb_endpoint_set_callback(ep, NULL, NULL, NULL); - /* stop urbs */ - stop_and_unlink_urbs(ep, force, true); + /* stop and unlink urbs */ + err = stop_urbs(ep, force); + if (err) + return err; + + wait_clear_urbs(ep); for (i = 0; i < ep->nurbs; i++) release_urb_ctx(&ep->urb[i]); @@ -928,6 +937,7 @@ static void release_urbs(struct snd_usb_endpoint *ep, int force) ep->syncbuf = NULL; ep->nurbs = 0; + return 0; } /* @@ -1118,7 +1128,7 @@ static int data_ep_set_params(struct snd_usb_endpoint *ep) return 0; out_of_memory: - release_urbs(ep, 0); + release_urbs(ep, false); return -ENOMEM; } @@ -1162,7 +1172,7 @@ static int sync_ep_set_params(struct snd_usb_endpoint *ep) return 0; out_of_memory: - release_urbs(ep, 0); + release_urbs(ep, false); return -ENOMEM; } @@ -1180,7 +1190,9 @@ static int snd_usb_endpoint_set_params(struct snd_usb_audio *chip, int err; /* release old buffers, if any */ - release_urbs(ep, 0); + err = release_urbs(ep, false); + if (err < 0) + return err; ep->datainterval = fmt->datainterval; ep->maxpacksize = fmt->maxpacksize; @@ -1360,7 +1372,8 @@ int snd_usb_endpoint_start(struct snd_usb_endpoint *ep) * from that context. */ - set_bit(EP_FLAG_RUNNING, &ep->flags); + if (!ep_state_update(ep, EP_STATE_STOPPED, EP_STATE_RUNNING)) + goto __error; if (snd_usb_endpoint_implicit_feedback_sink(ep)) { for (i = 0; i < ep->nurbs; i++) { @@ -1433,7 +1446,7 @@ void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep) WRITE_ONCE(ep->sync_source->sync_sink, NULL); if (!atomic_dec_return(&ep->running)) - stop_and_unlink_urbs(ep, false, false); + stop_urbs(ep, false); } /** @@ -1446,12 +1459,12 @@ void snd_usb_endpoint_stop(struct snd_usb_endpoint *ep) */ void snd_usb_endpoint_release(struct snd_usb_endpoint *ep) { - release_urbs(ep, 1); + release_urbs(ep, true); } /** * snd_usb_endpoint_free_all: Free the resources of an snd_usb_endpoint - * @card: The chip + * @chip: The chip * * This free all endpoints and those resources */ diff --git a/sound/usb/implicit.c b/sound/usb/implicit.c index 521cc846d9d9..11a85e66aa96 100644 --- a/sound/usb/implicit.c +++ b/sound/usb/implicit.c @@ -73,6 +73,7 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { /* No quirk for playback but with capture quirk (see below) */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x0130), /* BOSS BR-80 */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x0171), /* BOSS RC-505 */ + IMPLICIT_FB_SKIP_DEV(0x0582, 0x0185), /* BOSS GP-10 */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x0189), /* BOSS GT-100v2 */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x01d6), /* BOSS GT-1 */ IMPLICIT_FB_SKIP_DEV(0x0582, 0x01d8), /* BOSS Katana */ @@ -86,6 +87,7 @@ static const struct snd_usb_implicit_fb_match playback_implicit_fb_quirks[] = { static const struct snd_usb_implicit_fb_match capture_implicit_fb_quirks[] = { IMPLICIT_FB_FIXED_DEV(0x0582, 0x0130, 0x0d, 0x01), /* BOSS BR-80 */ IMPLICIT_FB_FIXED_DEV(0x0582, 0x0171, 0x0d, 0x01), /* BOSS RC-505 */ + IMPLICIT_FB_FIXED_DEV(0x0582, 0x0185, 0x0d, 0x01), /* BOSS GP-10 */ IMPLICIT_FB_FIXED_DEV(0x0582, 0x0189, 0x0d, 0x01), /* BOSS GT-100v2 */ IMPLICIT_FB_FIXED_DEV(0x0582, 0x01d6, 0x0d, 0x01), /* BOSS GT-1 */ IMPLICIT_FB_FIXED_DEV(0x0582, 0x01d8, 0x0d, 0x01), /* BOSS Katana */ @@ -302,7 +304,8 @@ static int audioformat_implicit_fb_quirk(struct snd_usb_audio *chip, /* Pioneer devices with vendor spec class */ if (attr == USB_ENDPOINT_SYNC_ASYNC && alts->desc.bInterfaceClass == USB_CLASS_VENDOR_SPEC && - USB_ID_VENDOR(chip->usb_id) == 0x2b73 /* Pioneer */) { + (USB_ID_VENDOR(chip->usb_id) == 0x2b73 || /* Pioneer */ + USB_ID_VENDOR(chip->usb_id) == 0x08e4 /* Pioneer */)) { if (skip_pioneer_sync_ep(chip, fmt, alts)) return 1; } diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 12b15ed59eaa..d5bdc9c4f452 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -1301,6 +1301,17 @@ static int get_min_max_with_quirks(struct usb_mixer_elem_info *cval, /* totally crap, return an error */ return -EINVAL; } + } else { + /* if the max volume is too low, it's likely a bogus range; + * here we use -96dB as the threshold + */ + if (cval->dBmax <= -9600) { + usb_audio_info(cval->head.mixer->chip, + "%d:%d: bogus dB values (%d/%d), disabling dB reporting\n", + cval->head.id, mixer_ctrl_intf(cval->head.mixer), + cval->dBmin, cval->dBmax); + cval->dBmin = cval->dBmax = 0; + } } return 0; diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c index a7212f16660e..646deb6244b1 100644 --- a/sound/usb/mixer_maps.c +++ b/sound/usb/mixer_maps.c @@ -536,6 +536,16 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = { .id = USB_ID(0x05a7, 0x1020), .map = bose_companion5_map, }, + { + /* Corsair Virtuoso SE (wired mode) */ + .id = USB_ID(0x1b1c, 0x0a3d), + .map = corsair_virtuoso_map, + }, + { + /* Corsair Virtuoso SE (wireless mode) */ + .id = USB_ID(0x1b1c, 0x0a3e), + .map = corsair_virtuoso_map, + }, { /* Corsair Virtuoso (wired mode) */ .id = USB_ID(0x1b1c, 0x0a41), diff --git a/sound/usb/mixer_quirks.c b/sound/usb/mixer_quirks.c index df036a359f2f..5171b3dc1eb9 100644 --- a/sound/usb/mixer_quirks.c +++ b/sound/usb/mixer_quirks.c @@ -2603,141 +2603,251 @@ static int snd_bbfpro_controls_create(struct usb_mixer_interface *mixer) } /* - * Pioneer DJ DJM-250MK2 and maybe other DJM models + * Pioneer DJ DJM Mixers * - * For playback, no duplicate mapping should be set. - * There are three mixer stereo channels (CH1, CH2, AUX) - * and three stereo sources (Playback 1-2, Playback 3-4, Playback 5-6). - * Each channel should be mapped just once to one source. - * If mapped multiple times, only one source will play on given channel - * (sources are not mixed together). + * These devices generally have options for soft-switching the playback and + * capture sources in addition to the recording level. Although different + * devices have different configurations, there seems to be canonical values + * for specific capture/playback types: See the definitions of these below. * - * For recording, duplicate mapping is OK. We will get the same signal multiple times. - * - * Channels 7-8 are in both directions fixed to FX SEND / FX RETURN. - * - * See also notes in the quirks-table.h file. + * The wValue is masked with the stereo channel number. e.g. Setting Ch2 to + * capture phono would be 0x0203. Capture, playback and capture level have + * different wIndexes. */ -struct snd_pioneer_djm_option { - const u16 wIndex; - const u16 wValue; +// Capture types +#define SND_DJM_CAP_LINE 0x00 +#define SND_DJM_CAP_CDLINE 0x01 +#define SND_DJM_CAP_DIGITAL 0x02 +#define SND_DJM_CAP_PHONO 0x03 +#define SND_DJM_CAP_PFADER 0x06 +#define SND_DJM_CAP_XFADERA 0x07 +#define SND_DJM_CAP_XFADERB 0x08 +#define SND_DJM_CAP_MIC 0x09 +#define SND_DJM_CAP_AUX 0x0d +#define SND_DJM_CAP_RECOUT 0x0a +#define SND_DJM_CAP_NONE 0x0f +#define SND_DJM_CAP_CH1PFADER 0x11 +#define SND_DJM_CAP_CH2PFADER 0x12 +#define SND_DJM_CAP_CH3PFADER 0x13 +#define SND_DJM_CAP_CH4PFADER 0x14 + +// Playback types +#define SND_DJM_PB_CH1 0x00 +#define SND_DJM_PB_CH2 0x01 +#define SND_DJM_PB_AUX 0x04 + +#define SND_DJM_WINDEX_CAP 0x8002 +#define SND_DJM_WINDEX_CAPLVL 0x8003 +#define SND_DJM_WINDEX_PB 0x8016 + +// kcontrol->private_value layout +#define SND_DJM_VALUE_MASK 0x0000ffff +#define SND_DJM_GROUP_MASK 0x00ff0000 +#define SND_DJM_DEVICE_MASK 0xff000000 +#define SND_DJM_GROUP_SHIFT 16 +#define SND_DJM_DEVICE_SHIFT 24 + +// device table index +#define SND_DJM_250MK2_IDX 0x0 +#define SND_DJM_750_IDX 0x1 +#define SND_DJM_900NXS2_IDX 0x2 + + +#define SND_DJM_CTL(_name, suffix, _default_value, _windex) { \ + .name = _name, \ + .options = snd_djm_opts_##suffix, \ + .noptions = ARRAY_SIZE(snd_djm_opts_##suffix), \ + .default_value = _default_value, \ + .wIndex = _windex } + +#define SND_DJM_DEVICE(suffix) { \ + .controls = snd_djm_ctls_##suffix, \ + .ncontrols = ARRAY_SIZE(snd_djm_ctls_##suffix) } + + +struct snd_djm_device { const char *name; + const struct snd_djm_ctl *controls; + size_t ncontrols; }; -static const struct snd_pioneer_djm_option snd_pioneer_djm_options_capture_level[] = { - { .name = "-5 dB", .wValue = 0x0300, .wIndex = 0x8003 }, - { .name = "-10 dB", .wValue = 0x0200, .wIndex = 0x8003 }, - { .name = "-15 dB", .wValue = 0x0100, .wIndex = 0x8003 }, - { .name = "-19 dB", .wValue = 0x0000, .wIndex = 0x8003 } +struct snd_djm_ctl { + const char *name; + const u16 *options; + size_t noptions; + u16 default_value; + u16 wIndex; }; -static const struct snd_pioneer_djm_option snd_pioneer_djm_options_capture_ch12[] = { - { .name = "CH1 Control Tone PHONO", .wValue = 0x0103, .wIndex = 0x8002 }, - { .name = "CH1 Control Tone LINE", .wValue = 0x0100, .wIndex = 0x8002 }, - { .name = "Post CH1 Fader", .wValue = 0x0106, .wIndex = 0x8002 }, - { .name = "Cross Fader A", .wValue = 0x0107, .wIndex = 0x8002 }, - { .name = "Cross Fader B", .wValue = 0x0108, .wIndex = 0x8002 }, - { .name = "MIC", .wValue = 0x0109, .wIndex = 0x8002 }, - { .name = "AUX", .wValue = 0x010d, .wIndex = 0x8002 }, - { .name = "REC OUT", .wValue = 0x010a, .wIndex = 0x8002 } +static const char *snd_djm_get_label_caplevel(u16 wvalue) +{ + switch (wvalue) { + case 0x0000: return "-19dB"; + case 0x0100: return "-15dB"; + case 0x0200: return "-10dB"; + case 0x0300: return "-5dB"; + default: return NULL; + } }; -static const struct snd_pioneer_djm_option snd_pioneer_djm_options_capture_ch34[] = { - { .name = "CH2 Control Tone PHONO", .wValue = 0x0203, .wIndex = 0x8002 }, - { .name = "CH2 Control Tone LINE", .wValue = 0x0200, .wIndex = 0x8002 }, - { .name = "Post CH2 Fader", .wValue = 0x0206, .wIndex = 0x8002 }, - { .name = "Cross Fader A", .wValue = 0x0207, .wIndex = 0x8002 }, - { .name = "Cross Fader B", .wValue = 0x0208, .wIndex = 0x8002 }, - { .name = "MIC", .wValue = 0x0209, .wIndex = 0x8002 }, - { .name = "AUX", .wValue = 0x020d, .wIndex = 0x8002 }, - { .name = "REC OUT", .wValue = 0x020a, .wIndex = 0x8002 } +static const char *snd_djm_get_label_cap(u16 wvalue) +{ + switch (wvalue & 0x00ff) { + case SND_DJM_CAP_LINE: return "Control Tone LINE"; + case SND_DJM_CAP_CDLINE: return "Control Tone CD/LINE"; + case SND_DJM_CAP_DIGITAL: return "Control Tone DIGITAL"; + case SND_DJM_CAP_PHONO: return "Control Tone PHONO"; + case SND_DJM_CAP_PFADER: return "Post Fader"; + case SND_DJM_CAP_XFADERA: return "Cross Fader A"; + case SND_DJM_CAP_XFADERB: return "Cross Fader B"; + case SND_DJM_CAP_MIC: return "Mic"; + case SND_DJM_CAP_RECOUT: return "Rec Out"; + case SND_DJM_CAP_AUX: return "Aux"; + case SND_DJM_CAP_NONE: return "None"; + case SND_DJM_CAP_CH1PFADER: return "Post Fader Ch1"; + case SND_DJM_CAP_CH2PFADER: return "Post Fader Ch2"; + case SND_DJM_CAP_CH3PFADER: return "Post Fader Ch3"; + case SND_DJM_CAP_CH4PFADER: return "Post Fader Ch4"; + default: return NULL; + } }; -static const struct snd_pioneer_djm_option snd_pioneer_djm_options_capture_ch56[] = { - { .name = "REC OUT", .wValue = 0x030a, .wIndex = 0x8002 }, - { .name = "Post CH1 Fader", .wValue = 0x0311, .wIndex = 0x8002 }, - { .name = "Post CH2 Fader", .wValue = 0x0312, .wIndex = 0x8002 }, - { .name = "Cross Fader A", .wValue = 0x0307, .wIndex = 0x8002 }, - { .name = "Cross Fader B", .wValue = 0x0308, .wIndex = 0x8002 }, - { .name = "MIC", .wValue = 0x0309, .wIndex = 0x8002 }, - { .name = "AUX", .wValue = 0x030d, .wIndex = 0x8002 } +static const char *snd_djm_get_label_pb(u16 wvalue) +{ + switch (wvalue & 0x00ff) { + case SND_DJM_PB_CH1: return "Ch1"; + case SND_DJM_PB_CH2: return "Ch2"; + case SND_DJM_PB_AUX: return "Aux"; + default: return NULL; + } }; -static const struct snd_pioneer_djm_option snd_pioneer_djm_options_playback_12[] = { - { .name = "CH1", .wValue = 0x0100, .wIndex = 0x8016 }, - { .name = "CH2", .wValue = 0x0101, .wIndex = 0x8016 }, - { .name = "AUX", .wValue = 0x0104, .wIndex = 0x8016 } +static const char *snd_djm_get_label(u16 wvalue, u16 windex) +{ + switch (windex) { + case SND_DJM_WINDEX_CAPLVL: return snd_djm_get_label_caplevel(wvalue); + case SND_DJM_WINDEX_CAP: return snd_djm_get_label_cap(wvalue); + case SND_DJM_WINDEX_PB: return snd_djm_get_label_pb(wvalue); + default: return NULL; + } }; -static const struct snd_pioneer_djm_option snd_pioneer_djm_options_playback_34[] = { - { .name = "CH1", .wValue = 0x0200, .wIndex = 0x8016 }, - { .name = "CH2", .wValue = 0x0201, .wIndex = 0x8016 }, - { .name = "AUX", .wValue = 0x0204, .wIndex = 0x8016 } + +// DJM-250MK2 +static const u16 snd_djm_opts_cap_level[] = { + 0x0000, 0x0100, 0x0200, 0x0300 }; + +static const u16 snd_djm_opts_250mk2_cap1[] = { + 0x0103, 0x0100, 0x0106, 0x0107, 0x0108, 0x0109, 0x010d, 0x010a }; + +static const u16 snd_djm_opts_250mk2_cap2[] = { + 0x0203, 0x0200, 0x0206, 0x0207, 0x0208, 0x0209, 0x020d, 0x020a }; + +static const u16 snd_djm_opts_250mk2_cap3[] = { + 0x030a, 0x0311, 0x0312, 0x0307, 0x0308, 0x0309, 0x030d }; + +static const u16 snd_djm_opts_250mk2_pb1[] = { 0x0100, 0x0101, 0x0104 }; +static const u16 snd_djm_opts_250mk2_pb2[] = { 0x0200, 0x0201, 0x0204 }; +static const u16 snd_djm_opts_250mk2_pb3[] = { 0x0300, 0x0301, 0x0304 }; + +static const struct snd_djm_ctl snd_djm_ctls_250mk2[] = { + SND_DJM_CTL("Capture Level", cap_level, 0, SND_DJM_WINDEX_CAPLVL), + SND_DJM_CTL("Ch1 Input", 250mk2_cap1, 2, SND_DJM_WINDEX_CAP), + SND_DJM_CTL("Ch2 Input", 250mk2_cap2, 2, SND_DJM_WINDEX_CAP), + SND_DJM_CTL("Ch3 Input", 250mk2_cap3, 0, SND_DJM_WINDEX_CAP), + SND_DJM_CTL("Ch1 Output", 250mk2_pb1, 0, SND_DJM_WINDEX_PB), + SND_DJM_CTL("Ch2 Output", 250mk2_pb2, 1, SND_DJM_WINDEX_PB), + SND_DJM_CTL("Ch3 Output", 250mk2_pb3, 2, SND_DJM_WINDEX_PB) }; -static const struct snd_pioneer_djm_option snd_pioneer_djm_options_playback_56[] = { - { .name = "CH1", .wValue = 0x0300, .wIndex = 0x8016 }, - { .name = "CH2", .wValue = 0x0301, .wIndex = 0x8016 }, - { .name = "AUX", .wValue = 0x0304, .wIndex = 0x8016 } + +// DJM-750 +static const u16 snd_djm_opts_750_cap1[] = { + 0x0101, 0x0103, 0x0106, 0x0107, 0x0108, 0x0109, 0x010a, 0x010f }; +static const u16 snd_djm_opts_750_cap2[] = { + 0x0200, 0x0201, 0x0206, 0x0207, 0x0208, 0x0209, 0x020a, 0x020f }; +static const u16 snd_djm_opts_750_cap3[] = { + 0x0300, 0x0301, 0x0306, 0x0307, 0x0308, 0x0309, 0x030a, 0x030f }; +static const u16 snd_djm_opts_750_cap4[] = { + 0x0401, 0x0403, 0x0406, 0x0407, 0x0408, 0x0409, 0x040a, 0x040f }; + +static const struct snd_djm_ctl snd_djm_ctls_750[] = { + SND_DJM_CTL("Capture Level", cap_level, 0, SND_DJM_WINDEX_CAPLVL), + SND_DJM_CTL("Ch1 Input", 750_cap1, 2, SND_DJM_WINDEX_CAP), + SND_DJM_CTL("Ch2 Input", 750_cap2, 2, SND_DJM_WINDEX_CAP), + SND_DJM_CTL("Ch3 Input", 750_cap3, 0, SND_DJM_WINDEX_CAP), + SND_DJM_CTL("Ch4 Input", 750_cap4, 0, SND_DJM_WINDEX_CAP) }; -struct snd_pioneer_djm_option_group { - const char *name; - const struct snd_pioneer_djm_option *options; - const size_t count; - const u16 default_value; + +// DJM-900NXS2 +static const u16 snd_djm_opts_900nxs2_cap1[] = { + 0x0100, 0x0102, 0x0103, 0x0106, 0x0107, 0x0108, 0x0109, 0x010a }; +static const u16 snd_djm_opts_900nxs2_cap2[] = { + 0x0200, 0x0202, 0x0203, 0x0206, 0x0207, 0x0208, 0x0209, 0x020a }; +static const u16 snd_djm_opts_900nxs2_cap3[] = { + 0x0300, 0x0302, 0x0303, 0x0306, 0x0307, 0x0308, 0x0309, 0x030a }; +static const u16 snd_djm_opts_900nxs2_cap4[] = { + 0x0400, 0x0402, 0x0403, 0x0406, 0x0407, 0x0408, 0x0409, 0x040a }; +static const u16 snd_djm_opts_900nxs2_cap5[] = { + 0x0507, 0x0508, 0x0509, 0x050a, 0x0511, 0x0512, 0x0513, 0x0514 }; + +static const struct snd_djm_ctl snd_djm_ctls_900nxs2[] = { + SND_DJM_CTL("Capture Level", cap_level, 0, SND_DJM_WINDEX_CAPLVL), + SND_DJM_CTL("Ch1 Input", 900nxs2_cap1, 2, SND_DJM_WINDEX_CAP), + SND_DJM_CTL("Ch2 Input", 900nxs2_cap2, 2, SND_DJM_WINDEX_CAP), + SND_DJM_CTL("Ch3 Input", 900nxs2_cap3, 2, SND_DJM_WINDEX_CAP), + SND_DJM_CTL("Ch4 Input", 900nxs2_cap4, 2, SND_DJM_WINDEX_CAP), + SND_DJM_CTL("Ch5 Input", 900nxs2_cap5, 3, SND_DJM_WINDEX_CAP) }; -#define snd_pioneer_djm_option_group_item(_name, suffix, _default_value) { \ - .name = _name, \ - .options = snd_pioneer_djm_options_##suffix, \ - .count = ARRAY_SIZE(snd_pioneer_djm_options_##suffix), \ - .default_value = _default_value } - -static const struct snd_pioneer_djm_option_group snd_pioneer_djm_option_groups[] = { - snd_pioneer_djm_option_group_item("Master Capture Level Capture Switch", capture_level, 0), - snd_pioneer_djm_option_group_item("Capture 1-2 Capture Switch", capture_ch12, 2), - snd_pioneer_djm_option_group_item("Capture 3-4 Capture Switch", capture_ch34, 2), - snd_pioneer_djm_option_group_item("Capture 5-6 Capture Switch", capture_ch56, 0), - snd_pioneer_djm_option_group_item("Playback 1-2 Playback Switch", playback_12, 0), - snd_pioneer_djm_option_group_item("Playback 3-4 Playback Switch", playback_34, 1), - snd_pioneer_djm_option_group_item("Playback 5-6 Playback Switch", playback_56, 2) + +static const struct snd_djm_device snd_djm_devices[] = { + SND_DJM_DEVICE(250mk2), + SND_DJM_DEVICE(750), + SND_DJM_DEVICE(900nxs2) }; -// layout of the kcontrol->private_value: -#define SND_PIONEER_DJM_VALUE_MASK 0x0000ffff -#define SND_PIONEER_DJM_GROUP_MASK 0xffff0000 -#define SND_PIONEER_DJM_GROUP_SHIFT 16 -static int snd_pioneer_djm_controls_info(struct snd_kcontrol *kctl, struct snd_ctl_elem_info *info) +static int snd_djm_controls_info(struct snd_kcontrol *kctl, + struct snd_ctl_elem_info *info) { - u16 group_index = kctl->private_value >> SND_PIONEER_DJM_GROUP_SHIFT; - size_t count; + unsigned long private_value = kctl->private_value; + u8 device_idx = (private_value & SND_DJM_DEVICE_MASK) >> SND_DJM_DEVICE_SHIFT; + u8 ctl_idx = (private_value & SND_DJM_GROUP_MASK) >> SND_DJM_GROUP_SHIFT; + const struct snd_djm_device *device = &snd_djm_devices[device_idx]; const char *name; - const struct snd_pioneer_djm_option_group *group; + const struct snd_djm_ctl *ctl; + size_t noptions; - if (group_index >= ARRAY_SIZE(snd_pioneer_djm_option_groups)) + if (ctl_idx >= device->ncontrols) + return -EINVAL; + + ctl = &device->controls[ctl_idx]; + noptions = ctl->noptions; + if (info->value.enumerated.item >= noptions) + info->value.enumerated.item = noptions - 1; + + name = snd_djm_get_label(ctl->options[info->value.enumerated.item], + ctl->wIndex); + if (!name) return -EINVAL; - group = &snd_pioneer_djm_option_groups[group_index]; - count = group->count; - if (info->value.enumerated.item >= count) - info->value.enumerated.item = count - 1; - name = group->options[info->value.enumerated.item].name; strlcpy(info->value.enumerated.name, name, sizeof(info->value.enumerated.name)); info->type = SNDRV_CTL_ELEM_TYPE_ENUMERATED; info->count = 1; - info->value.enumerated.items = count; + info->value.enumerated.items = noptions; return 0; } -static int snd_pioneer_djm_controls_update(struct usb_mixer_interface *mixer, u16 group, u16 value) +static int snd_djm_controls_update(struct usb_mixer_interface *mixer, + u8 device_idx, u8 group, u16 value) { int err; + const struct snd_djm_device *device = &snd_djm_devices[device_idx]; - if (group >= ARRAY_SIZE(snd_pioneer_djm_option_groups) - || value >= snd_pioneer_djm_option_groups[group].count) + if ((group >= device->ncontrols) || value >= device->controls[group].noptions) return -EINVAL; err = snd_usb_lock_shutdown(mixer->chip); @@ -2748,63 +2858,76 @@ static int snd_pioneer_djm_controls_update(struct usb_mixer_interface *mixer, u1 mixer->chip->dev, usb_sndctrlpipe(mixer->chip->dev, 0), USB_REQ_SET_FEATURE, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - snd_pioneer_djm_option_groups[group].options[value].wValue, - snd_pioneer_djm_option_groups[group].options[value].wIndex, + device->controls[group].options[value], + device->controls[group].wIndex, NULL, 0); snd_usb_unlock_shutdown(mixer->chip); return err; } -static int snd_pioneer_djm_controls_get(struct snd_kcontrol *kctl, struct snd_ctl_elem_value *elem) +static int snd_djm_controls_get(struct snd_kcontrol *kctl, + struct snd_ctl_elem_value *elem) { - elem->value.enumerated.item[0] = kctl->private_value & SND_PIONEER_DJM_VALUE_MASK; + elem->value.enumerated.item[0] = kctl->private_value & SND_DJM_VALUE_MASK; return 0; } -static int snd_pioneer_djm_controls_put(struct snd_kcontrol *kctl, struct snd_ctl_elem_value *elem) +static int snd_djm_controls_put(struct snd_kcontrol *kctl, struct snd_ctl_elem_value *elem) { struct usb_mixer_elem_list *list = snd_kcontrol_chip(kctl); struct usb_mixer_interface *mixer = list->mixer; unsigned long private_value = kctl->private_value; - u16 group = (private_value & SND_PIONEER_DJM_GROUP_MASK) >> SND_PIONEER_DJM_GROUP_SHIFT; + + u8 device = (private_value & SND_DJM_DEVICE_MASK) >> SND_DJM_DEVICE_SHIFT; + u8 group = (private_value & SND_DJM_GROUP_MASK) >> SND_DJM_GROUP_SHIFT; u16 value = elem->value.enumerated.item[0]; - kctl->private_value = (group << SND_PIONEER_DJM_GROUP_SHIFT) | value; + kctl->private_value = (((unsigned long)device << SND_DJM_DEVICE_SHIFT) | + (group << SND_DJM_GROUP_SHIFT) | + value); - return snd_pioneer_djm_controls_update(mixer, group, value); + return snd_djm_controls_update(mixer, device, group, value); } -static int snd_pioneer_djm_controls_resume(struct usb_mixer_elem_list *list) +static int snd_djm_controls_resume(struct usb_mixer_elem_list *list) { unsigned long private_value = list->kctl->private_value; - u16 group = (private_value & SND_PIONEER_DJM_GROUP_MASK) >> SND_PIONEER_DJM_GROUP_SHIFT; - u16 value = (private_value & SND_PIONEER_DJM_VALUE_MASK); + u8 device = (private_value & SND_DJM_DEVICE_MASK) >> SND_DJM_DEVICE_SHIFT; + u8 group = (private_value & SND_DJM_GROUP_MASK) >> SND_DJM_GROUP_SHIFT; + u16 value = (private_value & SND_DJM_VALUE_MASK); - return snd_pioneer_djm_controls_update(list->mixer, group, value); + return snd_djm_controls_update(list->mixer, device, group, value); } -static int snd_pioneer_djm_controls_create(struct usb_mixer_interface *mixer) +static int snd_djm_controls_create(struct usb_mixer_interface *mixer, + const u8 device_idx) { int err, i; - const struct snd_pioneer_djm_option_group *group; + u16 value; + + const struct snd_djm_device *device = &snd_djm_devices[device_idx]; + struct snd_kcontrol_new knew = { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, .index = 0, - .info = snd_pioneer_djm_controls_info, - .get = snd_pioneer_djm_controls_get, - .put = snd_pioneer_djm_controls_put + .info = snd_djm_controls_info, + .get = snd_djm_controls_get, + .put = snd_djm_controls_put }; - for (i = 0; i < ARRAY_SIZE(snd_pioneer_djm_option_groups); i++) { - group = &snd_pioneer_djm_option_groups[i]; - knew.name = group->name; - knew.private_value = (i << SND_PIONEER_DJM_GROUP_SHIFT) | group->default_value; - err = snd_pioneer_djm_controls_update(mixer, i, group->default_value); + for (i = 0; i < device->ncontrols; i++) { + value = device->controls[i].default_value; + knew.name = device->controls[i].name; + knew.private_value = ( + ((unsigned long)device_idx << SND_DJM_DEVICE_SHIFT) | + (i << SND_DJM_GROUP_SHIFT) | + value); + err = snd_djm_controls_update(mixer, device_idx, i, value); if (err) return err; - err = add_single_ctl_with_resume(mixer, 0, snd_pioneer_djm_controls_resume, + err = add_single_ctl_with_resume(mixer, 0, snd_djm_controls_resume, &knew, NULL); if (err) return err; @@ -2917,7 +3040,13 @@ int snd_usb_mixer_apply_create_quirk(struct usb_mixer_interface *mixer) err = snd_bbfpro_controls_create(mixer); break; case USB_ID(0x2b73, 0x0017): /* Pioneer DJ DJM-250MK2 */ - err = snd_pioneer_djm_controls_create(mixer); + err = snd_djm_controls_create(mixer, SND_DJM_250MK2_IDX); + break; + case USB_ID(0x08e4, 0x017f): /* Pioneer DJ DJM-750 */ + err = snd_djm_controls_create(mixer, SND_DJM_750_IDX); + break; + case USB_ID(0x2b73, 0x000a): /* Pioneer DJ DJM-900NXS2 */ + err = snd_djm_controls_create(mixer, SND_DJM_900NXS2_IDX); break; } diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c index 078bb4c94033..e5311b6bb3f6 100644 --- a/sound/usb/pcm.c +++ b/sound/usb/pcm.c @@ -270,10 +270,7 @@ static int snd_usb_pcm_sync_stop(struct snd_pcm_substream *substream) { struct snd_usb_substream *subs = substream->runtime->private_data; - if (!snd_usb_lock_shutdown(subs->stream->chip)) { - sync_pending_stops(subs); - snd_usb_unlock_shutdown(subs->stream->chip); - } + sync_pending_stops(subs); return 0; } @@ -848,13 +845,19 @@ get_sync_ep_from_substream(struct snd_usb_substream *subs) list_for_each_entry(fp, &subs->fmt_list, list) { ep = snd_usb_get_endpoint(chip, fp->endpoint); - if (ep && ep->cur_rate) - return ep; + if (ep && ep->cur_audiofmt) { + /* if EP is already opened solely for this substream, + * we still allow us to change the parameter; otherwise + * this substream has to follow the existing parameter + */ + if (ep->cur_audiofmt != subs->cur_audiofmt || ep->opened > 1) + return ep; + } if (!fp->implicit_fb) continue; /* for the implicit fb, check the sync ep as well */ ep = snd_usb_get_endpoint(chip, fp->sync_ep); - if (ep && ep->cur_rate) + if (ep && ep->cur_audiofmt) return ep; } return NULL; @@ -1558,7 +1561,7 @@ void snd_usb_preallocate_buffer(struct snd_usb_substream *subs) { struct snd_pcm *pcm = subs->stream->pcm; struct snd_pcm_substream *s = pcm->streams[subs->direction].substream; - struct device *dev = subs->dev->bus->controller; + struct device *dev = subs->dev->bus->sysdev; if (snd_usb_use_vmalloc) snd_pcm_set_managed_buffer(s, SNDRV_DMA_TYPE_VMALLOC, diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index c8a4bdf18207..1165a5ac60f2 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -3757,6 +3757,123 @@ AU0828_DEVICE(0x2040, 0x7270, "Hauppauge", "HVR-950Q"), } } }, +{ + /* + * Pioneer DJ DJM-750 + * 8 channels playback & 8 channels capture @ 44.1/48/96kHz S24LE + */ + USB_DEVICE_VENDOR_SPEC(0x08e4, 0x017f), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S24_3LE, + .channels = 8, + .iface = 0, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x05, + .ep_attr = USB_ENDPOINT_XFER_ISOC| + USB_ENDPOINT_SYNC_ASYNC, + .rates = SNDRV_PCM_RATE_44100| + SNDRV_PCM_RATE_48000| + SNDRV_PCM_RATE_96000, + .rate_min = 44100, + .rate_max = 96000, + .nr_rates = 3, + .rate_table = (unsigned int[]) { 44100, 48000, 96000 } + } + }, + { + .ifnum = 0, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S24_3LE, + .channels = 8, + .iface = 0, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x86, + .ep_idx = 1, + .ep_attr = USB_ENDPOINT_XFER_ISOC| + USB_ENDPOINT_SYNC_ASYNC| + USB_ENDPOINT_USAGE_IMPLICIT_FB, + .rates = SNDRV_PCM_RATE_44100| + SNDRV_PCM_RATE_48000| + SNDRV_PCM_RATE_96000, + .rate_min = 44100, + .rate_max = 96000, + .nr_rates = 3, + .rate_table = (unsigned int[]) { 44100, 48000, 96000 } + } + }, + { + .ifnum = -1 + } + } + } +}, +{ + /* + * Pioneer DJ DJM-450 + * PCM is 8 channels out @ 48 fixed (endpoint 0x01) + * and 8 channels in @ 48 fixed (endpoint 0x82). + */ + USB_DEVICE_VENDOR_SPEC(0x2b73, 0x0013), + .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) { + .ifnum = QUIRK_ANY_INTERFACE, + .type = QUIRK_COMPOSITE, + .data = (const struct snd_usb_audio_quirk[]) { + { + .ifnum = 0, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S24_3LE, + .channels = 8, // outputs + .iface = 0, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x01, + .ep_attr = USB_ENDPOINT_XFER_ISOC| + USB_ENDPOINT_SYNC_ASYNC, + .rates = SNDRV_PCM_RATE_48000, + .rate_min = 48000, + .rate_max = 48000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { 48000 } + } + }, + { + .ifnum = 0, + .type = QUIRK_AUDIO_FIXED_ENDPOINT, + .data = &(const struct audioformat) { + .formats = SNDRV_PCM_FMTBIT_S24_3LE, + .channels = 8, // inputs + .iface = 0, + .altsetting = 1, + .altset_idx = 1, + .endpoint = 0x82, + .ep_idx = 1, + .ep_attr = USB_ENDPOINT_XFER_ISOC| + USB_ENDPOINT_SYNC_ASYNC| + USB_ENDPOINT_USAGE_IMPLICIT_FB, + .rates = SNDRV_PCM_RATE_48000, + .rate_min = 48000, + .rate_max = 48000, + .nr_rates = 1, + .rate_table = (unsigned int[]) { 48000 } + } + }, + { + .ifnum = -1 + } + } + } +}, #undef USB_DEVICE_VENDOR_SPEC #undef USB_AUDIO_DEVICE diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index e196e364cef1..d3001fb18141 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -547,7 +547,7 @@ static int setup_disable_autosuspend(struct snd_usb_audio *chip, struct usb_driver *driver, const struct snd_usb_audio_quirk *quirk) { - driver->supports_autosuspend = 0; + usb_disable_autosuspend(interface_to_usbdev(iface)); return 1; /* Continue with creating streams and mixer */ } @@ -1470,6 +1470,23 @@ static void set_format_emu_quirk(struct snd_usb_substream *subs, subs->pkt_offset_adj = (emu_samplerate_id >= EMU_QUIRK_SR_176400HZ) ? 4 : 0; } +static int pioneer_djm_set_format_quirk(struct snd_usb_substream *subs, + u16 windex) +{ + unsigned int cur_rate = subs->data_endpoint->cur_rate; + u8 sr[3]; + // Convert to little endian + sr[0] = cur_rate & 0xff; + sr[1] = (cur_rate >> 8) & 0xff; + sr[2] = (cur_rate >> 16) & 0xff; + usb_set_interface(subs->dev, 0, 1); + // we should derive windex from fmt-sync_ep but it's not set + snd_usb_ctl_msg(subs->stream->chip->dev, + usb_sndctrlpipe(subs->stream->chip->dev, 0), + 0x01, 0x22, 0x0100, windex, &sr, 0x0003); + return 0; +} + void snd_usb_set_format_quirk(struct snd_usb_substream *subs, const struct audioformat *fmt) { @@ -1483,6 +1500,9 @@ void snd_usb_set_format_quirk(struct snd_usb_substream *subs, case USB_ID(0x534d, 0x2109): /* MacroSilicon MS2109 */ subs->stream_offset_adj = 2; break; + case USB_ID(0x2b73, 0x0013): /* Pioneer DJM-450 */ + pioneer_djm_set_format_quirk(subs, 0x0082); + break; } } @@ -1500,6 +1520,7 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip) case USB_ID(0x1901, 0x0191): /* GE B850V3 CP2114 audio interface */ case USB_ID(0x21b4, 0x0081): /* AudioQuest DragonFly */ case USB_ID(0x2912, 0x30c8): /* Audioengine D1 */ + case USB_ID(0x413c, 0xa506): /* Dell AE515 sound bar */ return true; } @@ -1650,6 +1671,14 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe, && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) msleep(20); + /* + * Plantronics headsets (C320, C320-M, etc) need a delay to avoid + * random microhpone failures. + */ + if (USB_ID_VENDOR(chip->usb_id) == 0x047f && + (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS) + msleep(20); + /* Zoom R16/24, many Logitech(at least H650e/H570e/BCC950), * Jabra 550a, Kingston HyperX needs a tiny delay here, * otherwise requests like get/set frequency return diff --git a/sound/usb/usbaudio.h b/sound/usb/usbaudio.h index 215c1771dd57..60b9dd7df6bb 100644 --- a/sound/usb/usbaudio.h +++ b/sound/usb/usbaudio.h @@ -27,6 +27,7 @@ struct snd_usb_audio { struct snd_card *card; struct usb_interface *intf[MAX_CARD_INTERFACES]; u32 usb_id; + uint16_t quirk_type; struct mutex mutex; unsigned int system_suspend; atomic_t active; diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index 7409d7860aa6..80d966cfcaa1 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -260,6 +260,11 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size) return btf_id__add(root, id, false); } +/* Older libelf.h and glibc elf.h might not yet define the ELF compression types. */ +#ifndef SHF_COMPRESSED +#define SHF_COMPRESSED (1 << 11) /* Section with compressed data. */ +#endif + /* * The data of compressed section should be aligned to 4 * (for 32bit) or 8 (for 64 bit) bytes. The binutils ld diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h index 89135bb35bf7..ae5662d368b9 100644 --- a/tools/include/linux/static_call_types.h +++ b/tools/include/linux/static_call_types.h @@ -4,11 +4,13 @@ #include #include +#include #define STATIC_CALL_KEY_PREFIX __SCK__ #define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX) #define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1) #define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name) +#define STATIC_CALL_KEY_STR(name) __stringify(STATIC_CALL_KEY(name)) #define STATIC_CALL_TRAMP_PREFIX __SCT__ #define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX) @@ -32,4 +34,52 @@ struct static_call_site { s32 key; }; +#define DECLARE_STATIC_CALL(name, func) \ + extern struct static_call_key STATIC_CALL_KEY(name); \ + extern typeof(func) STATIC_CALL_TRAMP(name); + +#ifdef CONFIG_HAVE_STATIC_CALL + +#define __raw_static_call(name) (&STATIC_CALL_TRAMP(name)) + +#ifdef CONFIG_HAVE_STATIC_CALL_INLINE + +/* + * __ADDRESSABLE() is used to ensure the key symbol doesn't get stripped from + * the symbol table so that objtool can reference it when it generates the + * .static_call_sites section. + */ +#define __STATIC_CALL_ADDRESSABLE(name) \ + __ADDRESSABLE(STATIC_CALL_KEY(name)) + +#define __static_call(name) \ +({ \ + __STATIC_CALL_ADDRESSABLE(name); \ + __raw_static_call(name); \ +}) + +#else /* !CONFIG_HAVE_STATIC_CALL_INLINE */ + +#define __STATIC_CALL_ADDRESSABLE(name) +#define __static_call(name) __raw_static_call(name) + +#endif /* CONFIG_HAVE_STATIC_CALL_INLINE */ + +#ifdef MODULE +#define __STATIC_CALL_MOD_ADDRESSABLE(name) +#define static_call_mod(name) __raw_static_call(name) +#else +#define __STATIC_CALL_MOD_ADDRESSABLE(name) __STATIC_CALL_ADDRESSABLE(name) +#define static_call_mod(name) __static_call(name) +#endif + +#define static_call(name) __static_call(name) + +#else + +#define static_call(name) \ + ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func)) + +#endif /* CONFIG_HAVE_STATIC_CALL */ + #endif /* _STATIC_CALL_TYPES_H */ diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 55bd78b3496f..310f647c2d5b 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -236,7 +236,7 @@ define do_install if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ fi; \ - $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' + $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2' endef install_lib: all_cmd diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 2f9d685bd522..0911aea4cdbe 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -462,7 +462,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr) return err; case BTF_KIND_ARRAY: - return btf_dump_order_type(d, btf_array(t)->type, through_ptr); + return btf_dump_order_type(d, btf_array(t)->type, false); case BTF_KIND_STRUCT: case BTF_KIND_UNION: { diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 6ae748f6ea11..8913e5e7bedb 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -883,24 +883,24 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, if (btf_is_ptr(mtype)) { struct bpf_program *prog; - mtype = skip_mods_and_typedefs(btf, mtype->type, &mtype_id); + prog = st_ops->progs[i]; + if (!prog) + continue; + kern_mtype = skip_mods_and_typedefs(kern_btf, kern_mtype->type, &kern_mtype_id); - if (!btf_is_func_proto(mtype) || - !btf_is_func_proto(kern_mtype)) { - pr_warn("struct_ops init_kern %s: non func ptr %s is not supported\n", + + /* mtype->type must be a func_proto which was + * guaranteed in bpf_object__collect_st_ops_relos(), + * so only check kern_mtype for func_proto here. + */ + if (!btf_is_func_proto(kern_mtype)) { + pr_warn("struct_ops init_kern %s: kernel member %s is not a func ptr\n", map->name, mname); return -ENOTSUP; } - prog = st_ops->progs[i]; - if (!prog) { - pr_debug("struct_ops init_kern %s: func ptr %s is not set\n", - map->name, mname); - continue; - } - prog->attach_btf_id = kern_type_id; prog->expected_attach_type = kern_member_idx; @@ -1180,7 +1180,8 @@ static int bpf_object__elf_init(struct bpf_object *obj) if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) { pr_warn("elf: failed to get section names strings from %s: %s\n", obj->path, elf_errmsg(-1)); - return -LIBBPF_ERRNO__FORMAT; + err = -LIBBPF_ERRNO__FORMAT; + goto errout; } /* Old LLVM set e_machine to EM_NONE */ diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 4dd73de00b6f..d2cb28e9ef52 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -40,7 +40,7 @@ static int libbpf_netlink_open(__u32 *nl_pid) memset(&sa, 0, sizeof(sa)); sa.nl_family = AF_NETLINK; - sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE); if (sock < 0) return -errno; diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index e3e41ceeb1bc..06746d96742f 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -535,15 +535,16 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk) if (fd < 0) continue; + memset(&map_info, 0, map_len); err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); if (err) { close(fd); continue; } - if (!strcmp(map_info.name, "xsks_map")) { + if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) { ctx->xsks_map_fd = fd; - continue; + break; } close(fd); diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index fd4af88c0ea5..151b13d0a267 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -48,7 +48,7 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, * replacement group. */ return insn->offset == special_alt->new_off && - (insn->type == INSN_CALL || is_static_jump(insn)); + (insn->type == INSN_CALL || is_jump(insn)); } /* diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4bd30315eb62..5c83f73ad668 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -502,8 +502,21 @@ static int create_static_call_sections(struct objtool_file *file) key_sym = find_symbol_by_name(file->elf, tmp); if (!key_sym) { - WARN("static_call: can't find static_call_key symbol: %s", tmp); - return -1; + if (!module) { + WARN("static_call: can't find static_call_key symbol: %s", tmp); + return -1; + } + + /* + * For modules(), the key might not be exported, which + * means the module can make static calls but isn't + * allowed to change them. + * + * In that case we temporarily set the key to be the + * trampoline address. This is fixed up in + * static_call_add_module(). + */ + key_sym = insn->call_dest; } free(key_name); @@ -789,7 +802,8 @@ static int add_jump_destinations(struct objtool_file *file) dest_sec = reloc->sym->sec; dest_off = reloc->sym->sym.st_value + arch_dest_reloc_offset(reloc->addend); - } else if (strstr(reloc->sym->name, "_indirect_thunk_")) { + } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) || + !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) { /* * Retpoline jumps are really dynamic jumps in * disguise, so convert them accordingly. @@ -849,8 +863,8 @@ static int add_jump_destinations(struct objtool_file *file) * case where the parent function's only reference to a * subfunction is through a jump table. */ - if (!strstr(insn->func->name, ".cold.") && - strstr(insn->jump_dest->func->name, ".cold.")) { + if (!strstr(insn->func->name, ".cold") && + strstr(insn->jump_dest->func->name, ".cold")) { insn->func->cfunc = insn->jump_dest->func; insn->jump_dest->func->pfunc = insn->func; @@ -2592,15 +2606,19 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_STD: - if (state.df) + if (state.df) { WARN_FUNC("recursive STD", sec, insn->offset); + return 1; + } state.df = true; break; case INSN_CLD: - if (!state.df && func) + if (!state.df && func) { WARN_FUNC("redundant CLD", sec, insn->offset); + return 1; + } state.df = false; break; diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 5ec00a4b891b..2804848e628e 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -54,6 +54,17 @@ static inline bool is_static_jump(struct instruction *insn) insn->type == INSN_JUMP_UNCONDITIONAL; } +static inline bool is_dynamic_jump(struct instruction *insn) +{ + return insn->type == INSN_JUMP_DYNAMIC || + insn->type == INSN_JUMP_DYNAMIC_CONDITIONAL; +} + +static inline bool is_jump(struct instruction *insn) +{ + return is_static_jump(insn) || is_dynamic_jump(insn); +} + struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset); diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 62f3deb1d3a8..e41a8f9b99d2 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -600,7 +600,7 @@ arch_errno_hdr_dir := $(srctree)/tools arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh $(arch_errno_name_array): $(arch_errno_tbl) - $(Q)$(SHELL) '$(arch_errno_tbl)' $(firstword $(CC)) $(arch_errno_hdr_dir) > $@ + $(Q)$(SHELL) '$(arch_errno_tbl)' '$(patsubst -%,,$(CC))' $(arch_errno_hdr_dir) > $@ sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index fd3911650612..51e593e896ea 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1663,7 +1663,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) status = -1; goto out_delete_session; } - err = evlist__add_pollfd(rec->evlist, done_fd); + err = evlist__add_wakeup_eventfd(rec->evlist, done_fd); if (err < 0) { pr_err("Failed to add wakeup eventfd to poll list\n"); status = err; diff --git a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json index 40010a8724b3..ce6e7e796057 100644 --- a/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json +++ b/tools/perf/pmu-events/arch/arm64/ampere/emag/cache.json @@ -114,7 +114,7 @@ "PublicDescription": "Level 2 access to instruciton TLB that caused a page table walk. This event counts on any instruciton access which causes L2I_TLB_REFILL to count", "EventCode": "0x35", "EventName": "L2I_TLB_ACCESS", - "BriefDescription": "L2D TLB access" + "BriefDescription": "L2I TLB access" }, { "PublicDescription": "Branch target buffer misprediction", diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 2393916f6128..92869eea5dbd 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -196,7 +196,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .data = {1, -1ULL, 211, 212, 213}, }; u64 regs[64]; - const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL}; + const u32 raw_data[] = {0x12345678, 0x0a0b0c0d, 0x11020304, 0x05060708, 0 }; const u64 data[] = {0x2211443366558877ULL, 0, 0xaabbccddeeff4321ULL}; const u64 aux_data[] = {0xa55a, 0, 0xeeddee, 0x0282028202820282}; struct perf_sample sample = { diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a60878498139..2723082f3817 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -298,10 +298,6 @@ static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues, queue->set = true; queue->tid = buffer->tid; queue->cpu = buffer->cpu; - } else if (buffer->cpu != queue->cpu || buffer->tid != queue->tid) { - pr_err("auxtrace queue conflict: cpu %d, tid %d vs cpu %d, tid %d\n", - queue->cpu, queue->tid, buffer->cpu, buffer->tid); - return -EINVAL; } buffer->buffer_nr = queues->next_buffer_nr++; diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 5dff7e489921..f24ab4585553 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -161,7 +161,7 @@ void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup) /* helper function for ftw() in match_cgroups and list_cgroups */ static int add_cgroup_name(const char *fpath, const struct stat *sb __maybe_unused, - int typeflag) + int typeflag, struct FTW *ftwbuf __maybe_unused) { struct cgroup_name *cn; @@ -209,12 +209,12 @@ static int list_cgroups(const char *str) if (!s) return -1; /* pretend if it's added by ftw() */ - ret = add_cgroup_name(s, NULL, FTW_D); + ret = add_cgroup_name(s, NULL, FTW_D, NULL); free(s); if (ret) return -1; } else { - if (add_cgroup_name("", NULL, FTW_D) < 0) + if (add_cgroup_name("", NULL, FTW_D, NULL) < 0) return -1; } @@ -247,7 +247,7 @@ static int match_cgroups(const char *str) prefix_len = strlen(mnt); /* collect all cgroups in the cgroup_list */ - if (ftw(mnt, add_cgroup_name, 20) < 0) + if (nftw(mnt, add_cgroup_name, 20, 0) < 0) return -1; for (;;) { diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 05616d4138a9..7e440fa90c93 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -673,6 +673,8 @@ int machine__resolve(struct machine *machine, struct addr_location *al, } al->sym = map__find_symbol(al->map, al->addr); + } else if (symbol_conf.dso_list) { + al->filtered |= (1 << HIST_FILTER__DSO); } if (symbol_conf.sym_list) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 05363a7247c4..fea4c1e8010d 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -572,6 +572,14 @@ int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask) return perf_evlist__filter_pollfd(&evlist->core, revents_and_mask); } +#ifdef HAVE_EVENTFD_SUPPORT +int evlist__add_wakeup_eventfd(struct evlist *evlist, int fd) +{ + return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, + fdarray_flag__nonfilterable); +} +#endif + int evlist__poll(struct evlist *evlist, int timeout) { return perf_evlist__poll(&evlist->core, timeout); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 1aae75895dea..6d4d62151bc8 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -142,6 +142,10 @@ struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char int evlist__add_pollfd(struct evlist *evlist, int fd); int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask); +#ifdef HAVE_EVENTFD_SUPPORT +int evlist__add_wakeup_eventfd(struct evlist *evlist, int fd); +#endif + int evlist__poll(struct evlist *evlist, int timeout); struct evsel *evlist__id2evsel(struct evlist *evlist, u64 id); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 697513f35154..197eb58a39cb 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -24,6 +24,13 @@ #include "intel-pt-decoder.h" #include "intel-pt-log.h" +#define BITULL(x) (1ULL << (x)) + +/* IA32_RTIT_CTL MSR bits */ +#define INTEL_PT_CYC_ENABLE BITULL(1) +#define INTEL_PT_CYC_THRESHOLD (BITULL(22) | BITULL(21) | BITULL(20) | BITULL(19)) +#define INTEL_PT_CYC_THRESHOLD_SHIFT 19 + #define INTEL_PT_BLK_SIZE 1024 #define BIT63 (((uint64_t)1 << 63)) @@ -167,6 +174,8 @@ struct intel_pt_decoder { uint64_t sample_tot_cyc_cnt; uint64_t base_cyc_cnt; uint64_t cyc_cnt_timestamp; + uint64_t ctl; + uint64_t cyc_threshold; double tsc_to_cyc; bool continuous_period; bool overflow; @@ -204,6 +213,14 @@ static uint64_t intel_pt_lower_power_of_2(uint64_t x) return x << i; } +static uint64_t intel_pt_cyc_threshold(uint64_t ctl) +{ + if (!(ctl & INTEL_PT_CYC_ENABLE)) + return 0; + + return (ctl & INTEL_PT_CYC_THRESHOLD) >> INTEL_PT_CYC_THRESHOLD_SHIFT; +} + static void intel_pt_setup_period(struct intel_pt_decoder *decoder) { if (decoder->period_type == INTEL_PT_PERIOD_TICKS) { @@ -245,12 +262,15 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) decoder->flags = params->flags; + decoder->ctl = params->ctl; decoder->period = params->period; decoder->period_type = params->period_type; decoder->max_non_turbo_ratio = params->max_non_turbo_ratio; decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio; + decoder->cyc_threshold = intel_pt_cyc_threshold(decoder->ctl); + intel_pt_setup_period(decoder); decoder->mtc_shift = params->mtc_period; @@ -1761,6 +1781,9 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder) break; case INTEL_PT_CYC: + intel_pt_calc_cyc_timestamp(decoder); + break; + case INTEL_PT_VMCS: case INTEL_PT_MNT: case INTEL_PT_PAD: @@ -2014,6 +2037,7 @@ static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, in static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) { + int last_packet_type = INTEL_PT_PAD; bool no_tip = false; int err; @@ -2022,6 +2046,12 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder) if (err) return err; next: + if (decoder->cyc_threshold) { + if (decoder->sample_cyc && last_packet_type != INTEL_PT_CYC) + decoder->sample_cyc = false; + last_packet_type = decoder->packet.type; + } + if (decoder->hop) { switch (intel_pt_hop_trace(decoder, &no_tip, &err)) { case HOP_IGNORE: @@ -2811,9 +2841,18 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) } if (intel_pt_sample_time(decoder->pkt_state)) { intel_pt_update_sample_time(decoder); - if (decoder->sample_cyc) + if (decoder->sample_cyc) { decoder->sample_tot_cyc_cnt = decoder->tot_cyc_cnt; + decoder->state.flags |= INTEL_PT_SAMPLE_IPC; + decoder->sample_cyc = false; + } } + /* + * When using only TSC/MTC to compute cycles, IPC can be + * sampled as soon as the cycle count changes. + */ + if (!decoder->have_cyc) + decoder->state.flags |= INTEL_PT_SAMPLE_IPC; } decoder->state.timestamp = decoder->sample_timestamp; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h index 8645fc265481..48adaa78acfc 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h @@ -17,6 +17,7 @@ #define INTEL_PT_ABORT_TX (1 << 1) #define INTEL_PT_ASYNC (1 << 2) #define INTEL_PT_FUP_IP (1 << 3) +#define INTEL_PT_SAMPLE_IPC (1 << 4) enum intel_pt_sample_type { INTEL_PT_BRANCH = 1 << 0, @@ -243,6 +244,7 @@ struct intel_pt_params { void *data; bool return_compression; bool branch_enable; + uint64_t ctl; uint64_t period; enum intel_pt_period_type period_type; unsigned max_non_turbo_ratio; diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 60214de42f31..2fff6f760457 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -893,6 +893,18 @@ static bool intel_pt_sampling_mode(struct intel_pt *pt) return false; } +static u64 intel_pt_ctl(struct intel_pt *pt) +{ + struct evsel *evsel; + u64 config; + + evlist__for_each_entry(pt->session->evlist, evsel) { + if (intel_pt_get_config(pt, &evsel->core.attr, &config)) + return config; + } + return 0; +} + static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns) { u64 quot, rem; @@ -1026,6 +1038,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt, params.data = ptq; params.return_compression = intel_pt_return_compression(pt); params.branch_enable = intel_pt_branch_enable(pt); + params.ctl = intel_pt_ctl(pt); params.max_non_turbo_ratio = pt->max_non_turbo_ratio; params.mtc_period = intel_pt_mtc_period(pt); params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n; @@ -1381,7 +1394,8 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq) sample.branch_stack = (struct branch_stack *)&dummy_bs; } - sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; + if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_br_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_br_insn_cnt; ptq->last_br_insn_cnt = ptq->ipc_insn_cnt; @@ -1431,7 +1445,8 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq) else sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt; - sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; + if (ptq->state->flags & INTEL_PT_SAMPLE_IPC) + sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt; if (sample.cyc_cnt) { sample.insn_cnt = ptq->ipc_insn_cnt - ptq->last_in_insn_cnt; ptq->last_in_insn_cnt = ptq->ipc_insn_cnt; @@ -1966,14 +1981,8 @@ static int intel_pt_sample(struct intel_pt_queue *ptq) ptq->have_sample = false; - if (ptq->state->tot_cyc_cnt > ptq->ipc_cyc_cnt) { - /* - * Cycle count and instruction count only go together to create - * a valid IPC ratio when the cycle count changes. - */ - ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; - ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; - } + ptq->ipc_insn_cnt = ptq->state->tot_insn_cnt; + ptq->ipc_cyc_cnt = ptq->state->tot_cyc_cnt; /* * Do PEBS first to allow for the possibility that the PEBS timestamp diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 80907bc32683..f564b210d761 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -3033,7 +3033,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sort__mode != SORT_MODE__MEMORY) + if (sort__mode != SORT_MODE__BRANCH) return -EINVAL; return __sort_dimension__add_output(list, sd); @@ -3045,7 +3045,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sort__mode != SORT_MODE__BRANCH) + if (sort__mode != SORT_MODE__MEMORY) return -EINVAL; return __sort_dimension__add_output(list, sd); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 64a039cbba1b..7dcf3327c5f7 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1561,12 +1561,11 @@ static int bfd2elf_binding(asymbol *symbol) int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) { int err = -1; - long symbols_size, symbols_count; + long symbols_size, symbols_count, i; asection *section; asymbol **symbols, *sym; struct symbol *symbol; bfd *abfd; - u_int i; u64 start, len; abfd = bfd_openr(dso->long_name, NULL); @@ -1867,8 +1866,10 @@ int dso__load(struct dso *dso, struct map *map) if (nsexit) nsinfo__mountns_enter(dso->nsinfo, &nsc); - if (bfdrc == 0) + if (bfdrc == 0) { + ret = 0; break; + } if (!is_reg || sirc < 0) continue; diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c index 2947e3f3c6d9..dda0a6a3173d 100644 --- a/tools/perf/util/synthetic-events.c +++ b/tools/perf/util/synthetic-events.c @@ -384,7 +384,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, while (!io.eof) { static const char anonstr[] = "//anon"; - size_t size; + size_t size, aligned_size; /* ensure null termination since stack will be reused. */ event->mmap2.filename[0] = '\0'; @@ -444,11 +444,12 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, } size = strlen(event->mmap2.filename) + 1; - size = PERF_ALIGN(size, sizeof(u64)); + aligned_size = PERF_ALIGN(size, sizeof(u64)); event->mmap2.len -= event->mmap.start; event->mmap2.header.size = (sizeof(event->mmap2) - - (sizeof(event->mmap2.filename) - size)); - memset(event->mmap2.filename + size, 0, machine->id_hdr_size); + (sizeof(event->mmap2.filename) - aligned_size)); + memset(event->mmap2.filename + size, 0, machine->id_hdr_size + + (aligned_size - size)); event->mmap2.header.size += machine->id_hdr_size; event->mmap2.pid = tgid; event->mmap2.tid = pid; diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index f507dff713c9..8a01af783310 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -361,6 +361,7 @@ static int read_saved_cmdline(struct tep_handle *pevent) pr_debug("error reading saved cmdlines\n"); goto out; } + buf[ret] = '\0'; parse_saved_cmdline(pevent, buf, size); ret = 0; diff --git a/tools/perf/util/unwind-libdw.c b/tools/perf/util/unwind-libdw.c index 0ada907c60d4..a74b517f7497 100644 --- a/tools/perf/util/unwind-libdw.c +++ b/tools/perf/util/unwind-libdw.c @@ -60,10 +60,8 @@ static int __report_module(struct addr_location *al, u64 ip, mod = dwfl_addrmodule(ui->dwfl, ip); if (mod) { Dwarf_Addr s; - void **userdatap; - dwfl_module_info(mod, &userdatap, &s, NULL, NULL, NULL, NULL, NULL); - *userdatap = dso; + dwfl_module_info(mod, NULL, &s, NULL, NULL, NULL, NULL, NULL); if (s != al->map->start - al->map->pgoff) mod = 0; } @@ -79,6 +77,13 @@ static int __report_module(struct addr_location *al, u64 ip, al->map->start - al->map->pgoff, false); } + if (mod) { + void **userdatap; + + dwfl_module_info(mod, &userdatap, NULL, NULL, NULL, NULL, NULL, NULL); + *userdatap = dso; + } + return mod && dwfl_addrmodule(ui->dwfl, ip) == mod ? 0 : -1; } diff --git a/tools/testing/kunit/configs/broken_on_uml.config b/tools/testing/kunit/configs/broken_on_uml.config index a7f0603d33f6..690870043ac0 100644 --- a/tools/testing/kunit/configs/broken_on_uml.config +++ b/tools/testing/kunit/configs/broken_on_uml.config @@ -40,3 +40,5 @@ # CONFIG_RESET_BRCMSTB_RESCAL is not set # CONFIG_RESET_INTEL_GW is not set # CONFIG_ADI_AXI_ADC is not set +# CONFIG_DEBUG_PAGEALLOC is not set +# CONFIG_PAGE_POISONING is not set diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index b593f4448e83..9a036e9d4455 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -288,19 +288,17 @@ def __eq__(self, other): class KUnitMainTest(unittest.TestCase): def setUp(self): path = get_absolute_path('test_data/test_is_test_passed-all_passed.log') - file = open(path) - all_passed_log = file.readlines() - self.print_patch = mock.patch('builtins.print') - self.print_mock = self.print_patch.start() + with open(path) as file: + all_passed_log = file.readlines() + + self.print_mock = mock.patch('builtins.print').start() + self.addCleanup(mock.patch.stopall) + self.linux_source_mock = mock.Mock() self.linux_source_mock.build_reconfig = mock.Mock(return_value=True) self.linux_source_mock.build_um_kernel = mock.Mock(return_value=True) self.linux_source_mock.run_kernel = mock.Mock(return_value=all_passed_log) - def tearDown(self): - self.print_patch.stop() - pass - def test_config_passes_args_pass(self): kunit.main(['config', '--build_dir=.kunit'], self.linux_source_mock) assert self.linux_source_mock.build_reconfig.call_count == 1 diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index 71c960dcd8a4..652254754b4c 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -55,7 +55,6 @@ int main(void) struct test *test, tests[] = { { -EINVAL, 1, pfn(0), NULL, PAGE_SIZE, 0, 1 }, { 0, 1, pfn(0), NULL, PAGE_SIZE, PAGE_SIZE + 1, 1 }, - { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax + 1, 1 }, { 0, 1, pfn(0), NULL, PAGE_SIZE, sgmax, 1 }, { 0, 1, pfn(0), NULL, 1, sgmax, 1 }, { 0, 2, pfn(0, 1), NULL, 2 * PAGE_SIZE, sgmax, 1 }, diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index b2282be6f938..612d3899614a 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -332,5 +332,5 @@ int main(void) ksft_print_cnts(); - return 0; + return ret; } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c index 76ebe4c250f1..eb90a6b8850d 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c @@ -20,39 +20,6 @@ static __u32 bpf_map_id(struct bpf_map *map) return info.id; } -/* - * Trigger synchronize_rcu() in kernel. - * - * ARRAY_OF_MAPS/HASH_OF_MAPS lookup/update operations trigger synchronize_rcu() - * if looking up an existing non-NULL element or updating the map with a valid - * inner map FD. Use this fact to trigger synchronize_rcu(): create map-in-map, - * create a trivial ARRAY map, update map-in-map with ARRAY inner map. Then - * cleanup. At the end, at least one synchronize_rcu() would be called. - */ -static int kern_sync_rcu(void) -{ - int inner_map_fd, outer_map_fd, err, zero = 0; - - inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 4, 1, 0); - if (CHECK(inner_map_fd < 0, "inner_map_create", "failed %d\n", -errno)) - return -1; - - outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL, - sizeof(int), inner_map_fd, 1, 0); - if (CHECK(outer_map_fd < 0, "outer_map_create", "failed %d\n", -errno)) { - close(inner_map_fd); - return -1; - } - - err = bpf_map_update_elem(outer_map_fd, &zero, &inner_map_fd, 0); - if (err) - err = -errno; - CHECK(err, "outer_map_update", "failed %d\n", err); - close(inner_map_fd); - close(outer_map_fd); - return err; -} - static void test_lookup_update(void) { int map1_fd, map2_fd, map3_fd, map4_fd, map5_fd, map1_id, map2_id; diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c new file mode 100644 index 000000000000..6c4d42a2386f --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include "fexit_sleep.skel.h" + +static int do_sleep(void *skel) +{ + struct fexit_sleep *fexit_skel = skel; + struct timespec ts1 = { .tv_nsec = 1 }; + struct timespec ts2 = { .tv_sec = 10 }; + + fexit_skel->bss->pid = getpid(); + (void)syscall(__NR_nanosleep, &ts1, NULL); + (void)syscall(__NR_nanosleep, &ts2, NULL); + return 0; +} + +#define STACK_SIZE (1024 * 1024) +static char child_stack[STACK_SIZE]; + +void test_fexit_sleep(void) +{ + struct fexit_sleep *fexit_skel = NULL; + int wstatus, duration = 0; + pid_t cpid; + int err, fexit_cnt; + + fexit_skel = fexit_sleep__open_and_load(); + if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) + goto cleanup; + + err = fexit_sleep__attach(fexit_skel); + if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) + goto cleanup; + + cpid = clone(do_sleep, child_stack + STACK_SIZE, CLONE_FILES | SIGCHLD, fexit_skel); + if (CHECK(cpid == -1, "clone", strerror(errno))) + goto cleanup; + + /* wait until first sys_nanosleep ends and second sys_nanosleep starts */ + while (READ_ONCE(fexit_skel->bss->fentry_cnt) != 2); + fexit_cnt = READ_ONCE(fexit_skel->bss->fexit_cnt); + if (CHECK(fexit_cnt != 1, "fexit_cnt", "%d", fexit_cnt)) + goto cleanup; + + /* close progs and detach them. That will trigger two nop5->jmp5 rewrites + * in the trampolines to skip nanosleep_fexit prog. + * The nanosleep_fentry prog will get detached first. + * The nanosleep_fexit prog will get detached second. + * Detaching will trigger freeing of both progs JITed images. + * There will be two dying bpf_tramp_image-s, but only the initial + * bpf_tramp_image (with both _fentry and _fexit progs will be stuck + * waiting for percpu_ref_kill to confirm). The other one + * will be freed quickly. + */ + close(bpf_program__fd(fexit_skel->progs.nanosleep_fentry)); + close(bpf_program__fd(fexit_skel->progs.nanosleep_fexit)); + fexit_sleep__detach(fexit_skel); + + /* kill the thread to unwind sys_nanosleep stack through the trampoline */ + kill(cpid, 9); + + if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno))) + goto cleanup; + if (CHECK(WEXITSTATUS(wstatus) != 0, "exitstatus", "failed")) + goto cleanup; + + /* The bypassed nanosleep_fexit prog shouldn't have executed. + * Unlike progs the maps were not freed and directly accessible. + */ + fexit_cnt = READ_ONCE(fexit_skel->bss->fexit_cnt); + if (CHECK(fexit_cnt != 1, "fexit_cnt", "%d", fexit_cnt)) + goto cleanup; + +cleanup: + fexit_sleep__destroy(fexit_skel); +} diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c new file mode 100644 index 000000000000..03a672d76353 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fexit_sleep.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021 Facebook */ +#include "vmlinux.h" +#include +#include + +char LICENSE[] SEC("license") = "GPL"; + +int pid = 0; +int fentry_cnt = 0; +int fexit_cnt = 0; + +SEC("fentry/__x64_sys_nanosleep") +int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs) +{ + if ((int)bpf_get_current_pid_tgid() != pid) + return 0; + + fentry_cnt++; + return 0; +} + +SEC("fexit/__x64_sys_nanosleep") +int BPF_PROG(nanosleep_fexit, const struct pt_regs *regs, int ret) +{ + if ((int)bpf_get_current_pid_tgid() != pid) + return 0; + + fexit_cnt++; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c index 6b670039ea67..1d8918dfbd3f 100644 --- a/tools/testing/selftests/bpf/progs/netif_receive_skb.c +++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c @@ -16,6 +16,13 @@ bool skip = false; #define STRSIZE 2048 #define EXPECTED_STRSIZE 256 +#if defined(bpf_target_s390) +/* NULL points to a readable struct lowcore on s390, so take the last page */ +#define BADPTR ((void *)0xFFFFFFFFFFFFF000ULL) +#else +#define BADPTR 0 +#endif + #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif @@ -113,11 +120,11 @@ int BPF_PROG(trace_netif_receive_skb, struct sk_buff *skb) } /* Check invalid ptr value */ - p.ptr = 0; + p.ptr = BADPTR; __ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0); if (__ret >= 0) { - bpf_printk("printing NULL should generate error, got (%d)", - __ret); + bpf_printk("printing %llx should generate error, got (%d)", + (unsigned long long)BADPTR, __ret); ret = -ERANGE; } diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index a621b58ab079..ba6eadfec565 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -446,10 +446,8 @@ int _geneve_get_tunnel(struct __sk_buff *skb) } ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt)); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } + if (ret < 0) + gopt.opt_class = 0; bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4, gopt.opt_class); @@ -510,10 +508,8 @@ int _ip6geneve_get_tunnel(struct __sk_buff *skb) } ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt)); - if (ret < 0) { - ERROR(ret); - return TC_ACT_SHOT; - } + if (ret < 0) + gopt.opt_class = 0; bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4, gopt.opt_class); diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 7d077d48cadd..6396932b97e2 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -11,6 +11,7 @@ #include #include #include /* backtrace */ +#include #define EXIT_NO_TEST 2 #define EXIT_ERR_SETUP_INFRA 3 @@ -370,8 +371,18 @@ static int delete_module(const char *name, int flags) return syscall(__NR_delete_module, name, flags); } +/* + * Trigger synchronize_rcu() in kernel. + */ +int kern_sync_rcu(void) +{ + return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0); +} + static void unload_bpf_testmod(void) { + if (kern_sync_rcu()) + fprintf(env.stderr, "Failed to trigger kernel-side RCU sync!\n"); if (delete_module("bpf_testmod", 0)) { if (errno == ENOENT) { if (env.verbosity > VERBOSE_NONE) @@ -379,7 +390,7 @@ static void unload_bpf_testmod(void) return; } fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); - exit(1); + return; } if (env.verbosity > VERBOSE_NONE) fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 115953243f62..e49e2fdde942 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -219,6 +219,7 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name); int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); int extract_build_id(char *build_id, size_t size); +int kern_sync_rcu(void); #ifdef __x86_64__ #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep" diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh index dd80f0c84afb..c033850886f4 100755 --- a/tools/testing/selftests/bpf/test_xdp_redirect.sh +++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # Create 2 namespaces with two veth peers, and # forward packets in-between using generic XDP # @@ -57,12 +57,8 @@ test_xdp_redirect() ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null - ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null - local ret1=$? - ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null - local ret2=$? - - if [ $ret1 -eq 0 -a $ret2 -eq 0 ]; then + if ip netns exec ns1 ping -c 1 10.1.1.22 &> /dev/null && + ip netns exec ns2 ping -c 1 10.1.1.11 &> /dev/null; then echo "selftests: test_xdp_redirect $xdpmode [PASS]"; else ret=1 diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c index bed53b561e04..1b138cd2b187 100644 --- a/tools/testing/selftests/bpf/verifier/array_access.c +++ b/tools/testing/selftests/bpf/verifier/array_access.c @@ -250,12 +250,13 @@ BPF_MOV64_IMM(BPF_REG_5, 0), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .fixup_map_array_ro = { 3 }, .result = ACCEPT, - .retval = -29, + .retval = 65507, }, { "invalid write map access into a read-only array 1", diff --git a/tools/testing/selftests/bpf/verifier/bounds_deduction.c b/tools/testing/selftests/bpf/verifier/bounds_deduction.c index 1fd07a4f27ac..c162498a64fc 100644 --- a/tools/testing/selftests/bpf/verifier/bounds_deduction.c +++ b/tools/testing/selftests/bpf/verifier/bounds_deduction.c @@ -6,8 +6,9 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", .errstr = "R0 tried to subtract pointer from scalar", + .result = REJECT, }, { "check deducing bounds from const, 2", @@ -20,6 +21,8 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types", + .result_unpriv = REJECT, .result = ACCEPT, .retval = 1, }, @@ -31,8 +34,9 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", .errstr = "R0 tried to subtract pointer from scalar", + .result = REJECT, }, { "check deducing bounds from const, 4", @@ -45,6 +49,8 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types", + .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -55,8 +61,9 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", .errstr = "R0 tried to subtract pointer from scalar", + .result = REJECT, }, { "check deducing bounds from const, 6", @@ -67,8 +74,9 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", .errstr = "R0 tried to subtract pointer from scalar", + .result = REJECT, }, { "check deducing bounds from const, 7", @@ -80,8 +88,9 @@ offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types", .errstr = "dereference of modified ctx ptr", + .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { @@ -94,8 +103,9 @@ offsetof(struct __sk_buff, mark)), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types", .errstr = "dereference of modified ctx ptr", + .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { @@ -106,8 +116,9 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, + .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types", .errstr = "R0 tried to subtract pointer from scalar", + .result = REJECT, }, { "check deducing bounds from const, 10", @@ -119,6 +130,6 @@ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), BPF_EXIT_INSN(), }, - .result = REJECT, .errstr = "math between ctx pointer and register with unbounded min value is not allowed", + .result = REJECT, }, diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c index b117bdd3806d..6f610cfddae5 100644 --- a/tools/testing/selftests/bpf/verifier/map_ptr.c +++ b/tools/testing/selftests/bpf/verifier/map_ptr.c @@ -75,6 +75,8 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_16b = { 4 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types", .result = ACCEPT, }, { @@ -91,5 +93,7 @@ BPF_EXIT_INSN(), }, .fixup_map_hash_16b = { 4 }, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types", .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c index a3fe0fbaed41..2df9871b169d 100644 --- a/tools/testing/selftests/bpf/verifier/unpriv.c +++ b/tools/testing/selftests/bpf/verifier/unpriv.c @@ -496,7 +496,7 @@ .result = ACCEPT, }, { - "unpriv: adding of fp", + "unpriv: adding of fp, reg", .insns = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_MOV64_IMM(BPF_REG_1, 0), @@ -504,6 +504,19 @@ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), BPF_EXIT_INSN(), }, + .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "unpriv: adding of fp, imm", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8), + BPF_EXIT_INSN(), + }, .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", .result_unpriv = REJECT, .result = ACCEPT, diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index ed4e76b24649..feb91266db39 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -169,7 +169,7 @@ .fixup_map_array_48b = { 1 }, .result = ACCEPT, .result_unpriv = REJECT, - .errstr_unpriv = "R2 tried to add from different maps or paths", + .errstr_unpriv = "R2 tried to add from different maps, paths, or prohibited types", .retval = 0, }, { @@ -516,6 +516,27 @@ .result = ACCEPT, .retval = 0xabcdef12, }, +{ + "map access: value_ptr += N, value_ptr -= N known scalar", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV32_IMM(BPF_REG_1, 0x12345678), + BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result = ACCEPT, + .retval = 0x12345678, +}, { "map access: unknown scalar += value_ptr, 1", .insns = { diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c index 1e722ee76b1f..e7945b6246c8 100644 --- a/tools/testing/selftests/bpf/xdpxceiver.c +++ b/tools/testing/selftests/bpf/xdpxceiver.c @@ -729,7 +729,6 @@ static void worker_pkt_validate(void) u32 payloadseqnum = -2; while (1) { - pkt_node_rx_q = malloc(sizeof(struct pkt)); pkt_node_rx_q = TAILQ_LAST(&head, head_s); if (!pkt_node_rx_q) break; diff --git a/tools/testing/selftests/dmabuf-heaps/Makefile b/tools/testing/selftests/dmabuf-heaps/Makefile index 607c2acd2082..604b43ece15f 100644 --- a/tools/testing/selftests/dmabuf-heaps/Makefile +++ b/tools/testing/selftests/dmabuf-heaps/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -static -O3 -Wl,-no-as-needed -Wall -I../../../../usr/include +CFLAGS += -static -O3 -Wl,-no-as-needed -Wall TEST_GEN_PROGS = dmabuf-heap diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc index ada594fe16cb..955e3ceea44b 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc @@ -1,19 +1,38 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: event trigger - test synthetic_events syntax parser errors -# requires: synthetic_events error_log +# requires: synthetic_events error_log "char name[]' >> synthetic_events":README check_error() { # command-with-error-pos-by-^ ftrace_errlog_check 'synthetic_events' "$1" 'synthetic_events' } +check_dyn_error() { # command-with-error-pos-by-^ + ftrace_errlog_check 'synthetic_events' "$1" 'dynamic_events' +} + check_error 'myevent ^chr arg' # INVALID_TYPE -check_error 'myevent ^char str[];; int v' # INVALID_TYPE -check_error 'myevent char ^str]; int v' # INVALID_NAME -check_error 'myevent char ^str;[]' # INVALID_NAME -check_error 'myevent ^char str[; int v' # INVALID_TYPE -check_error '^mye;vent char str[]' # BAD_NAME -check_error 'myevent char str[]; ^int' # INVALID_FIELD -check_error '^myevent' # INCOMPLETE_CMD +check_error 'myevent ^unsigned arg' # INCOMPLETE_TYPE + +check_error 'myevent char ^str]; int v' # BAD_NAME +check_error '^mye-vent char str[]' # BAD_NAME +check_error 'myevent char ^st-r[]' # BAD_NAME + +check_error 'myevent char str;^[]' # INVALID_FIELD +check_error 'myevent char str; ^int' # INVALID_FIELD + +check_error 'myevent char ^str[; int v' # INVALID_ARRAY_SPEC +check_error 'myevent char ^str[kdjdk]' # INVALID_ARRAY_SPEC +check_error 'myevent char ^str[257]' # INVALID_ARRAY_SPEC + +check_error '^mye;vent char str[]' # INVALID_CMD +check_error '^myevent ; char str[]' # INVALID_CMD +check_error '^myevent; char str[]' # INVALID_CMD +check_error '^myevent ;char str[]' # INVALID_CMD +check_error '^; char str[]' # INVALID_CMD +check_error '^;myevent char str[]' # INVALID_CMD +check_error '^myevent' # INVALID_CMD + +check_dyn_error '^s:junk/myevent char str[' # INVALID_DYN_CMD exit 0 diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh index 197e769c2ed1..f8cda822c1ce 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh @@ -86,11 +86,20 @@ test_ip6gretap() test_gretap_stp() { + # Sometimes after mirror installation, the neighbor's state is not valid. + # The reason is that there is no SW datapath activity related to the + # neighbor for the remote GRE address. Therefore whether the corresponding + # neighbor will be valid is a matter of luck, and the test is thus racy. + # Set the neighbor's state to permanent, so it would be always valid. + ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \ + nud permanent dev br2 full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap" } test_ip6gretap_stp() { + ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \ + nud permanent dev br2 full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap" } diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index ce6bea9675c0..0ccb1dda099a 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -658,7 +658,7 @@ test_ecn_decap() # In accordance with INET_ECN_decapsulate() __test_ecn_decap 00 00 0x00 __test_ecn_decap 01 01 0x01 - __test_ecn_decap 02 01 0x02 + __test_ecn_decap 02 01 0x01 __test_ecn_decap 01 03 0x03 __test_ecn_decap 02 03 0x03 test_ecn_decap_error diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 2cfd87d94db8..e927df83efb9 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -493,7 +493,7 @@ do_transfer() echo "${listener_ns} SYNRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}" fi if [ $expect_ackrx -ne $stat_ackrx_now_l ] ;then - echo "${listener_ns} ACKRX: ${cl_proto} -> ${srv_proto}: expect ${expect_synrx}, got ${stat_synrx_now_l}" + echo "${listener_ns} ACKRX: ${cl_proto} -> ${srv_proto}: expect ${expect_ackrx}, got ${stat_ackrx_now_l} " fi if [ $retc -eq 0 ] && [ $rets -eq 0 ];then diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c index 7b01b7c2ec10..066efd30e294 100644 --- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c @@ -30,25 +30,25 @@ struct reuse_opts { }; struct reuse_opts unreusable_opts[12] = { - {0, 0, 0, 0}, - {0, 0, 0, 1}, - {0, 0, 1, 0}, - {0, 0, 1, 1}, - {0, 1, 0, 0}, - {0, 1, 0, 1}, - {0, 1, 1, 0}, - {0, 1, 1, 1}, - {1, 0, 0, 0}, - {1, 0, 0, 1}, - {1, 0, 1, 0}, - {1, 0, 1, 1}, + {{0, 0}, {0, 0}}, + {{0, 0}, {0, 1}}, + {{0, 0}, {1, 0}}, + {{0, 0}, {1, 1}}, + {{0, 1}, {0, 0}}, + {{0, 1}, {0, 1}}, + {{0, 1}, {1, 0}}, + {{0, 1}, {1, 1}}, + {{1, 0}, {0, 0}}, + {{1, 0}, {0, 1}}, + {{1, 0}, {1, 0}}, + {{1, 0}, {1, 1}}, }; struct reuse_opts reusable_opts[4] = { - {1, 1, 0, 0}, - {1, 1, 0, 1}, - {1, 1, 1, 0}, - {1, 1, 1, 1}, + {{1, 1}, {0, 0}}, + {{1, 1}, {0, 1}}, + {{1, 1}, {1, 0}}, + {{1, 1}, {1, 1}}, }; int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport) diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh index 0d783e1065c8..64779f073e17 100755 --- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh @@ -86,5 +86,5 @@ echo "$failed devices failed to recover ($dev_count tested)" lspci | diff -u $pre_lspci - rm -f $pre_lspci -test "$failed" == 0 +test "$failed" -eq 0 exit $? diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 26c72f2b61b1..1b6c7d33c4ff 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -315,7 +315,7 @@ TEST(kcmp) ret = __filecmp(getpid(), getpid(), 1, 1); EXPECT_EQ(ret, 0); if (ret != 0 && errno == ENOSYS) - SKIP(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)"); + SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)"); } TEST(mode_strict_support) diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 74c69b75f6f5..7ed7cd95e58f 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -39,7 +39,7 @@ ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; } ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } sleep() { read -t "$1" -N 1 || true; } -waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } +waitiperf() { pretty "${1//*-}" "wait for iperf:${3:-5201} pid $2"; while [[ $(ss -N "$1" -tlpH "sport = ${3:-5201}") != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; } waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; } @@ -141,6 +141,19 @@ tests() { n2 iperf3 -s -1 -B fd00::2 & waitiperf $netns2 $! n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 + + # TCP over IPv4, in parallel + for max in 4 5 50; do + local pids=( ) + for ((i=0; i < max; ++i)) do + n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 & + pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i )) + done + for ((i=0; i < max; ++i)) do + n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 & + done + wait "${pids[@]}" + done } [[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8367d88ce39b..2caba2828982 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1903,10 +1903,12 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, bool write_fault, bool *writable, kvm_pfn_t *p_pfn) { - unsigned long pfn; + kvm_pfn_t pfn; + pte_t *ptep; + spinlock_t *ptl; int r; - r = follow_pfn(vma, addr, &pfn); + r = follow_pte(vma->vm_mm, addr, &ptep, &ptl); if (r) { /* * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does @@ -1921,14 +1923,19 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, if (r) return r; - r = follow_pfn(vma, addr, &pfn); + r = follow_pte(vma->vm_mm, addr, &ptep, &ptl); if (r) return r; + } + if (write_fault && !pte_write(*ptep)) { + pfn = KVM_PFN_ERR_RO_FAULT; + goto out; } if (writable) - *writable = true; + *writable = pte_write(*ptep); + pfn = pte_pfn(*ptep); /* * Get a reference here because callers of *hva_to_pfn* and @@ -1943,6 +1950,8 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, */ kvm_get_pfn(pfn); +out: + pte_unmap_unlock(ptep, ptl); *p_pfn = pfn; return 0; }